/**********************************************/
/** SUPPLEMENTAL CODES
/**********************************************/

/****************************************************/
/** CODE 1: Code_Industry_Concentraion_OL **/
/****************************************************/

%let wrds = wrds-cloud.wharton.upenn.edu 4016; 
options comamid=TCP remote=WRDS;
signon username=_prompt_;

/** MERGING CRSP AND COMPUSTAT **/

/************************************************************************************
* STEP ONE: Extract Compustat data; 
************************************************************************************/
rsubmit;
data compx2;
   set comp.funda (keep = gvkey tic fyear fyr datadate SALE COGS AT INDFMT DATAFMT POPSRC CONSOL at lt csho prcc_f CEQ DLC DLTT DD1 DD2 DD3 DD4 DD5 
                          IB XINT TXT SPI WCAP ACT LCT FOPT RE LT SALE XSGA DV NI DM DCVT SICH/**SPLTICRM SIC**/ che txp dp AJEX XRD
                          MIB PSTK PPEGT INVT TXDI OIBDP); 
   if indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C';
   /** create begin and end dates of fiscal year**/
   format endfyr begfyr date9.;
   endfyr= datadate;
   begfyr= intnx('month',endfyr,-11,'beg');  /* intnx(interval, from, n, 'aligment') */
run;

proc sort; by gvkey endfyr; 
run;

/*** step 1a***
merge with the current SIC codes
***/
rsubmit;
proc sql;
  create table compx3 as select *, b.sic, b.conm
  from compx2 as a, comp.names as b
  where a.gvkey = b.gvkey and
  year(a.datadate) le b.year2 and  year(a.datadate) ge b.year1;                
quit;

/*** step 1b***
add credit rating
***/
rsubmit;
proc sql;
create table compx4 as select a.*, b.splticrm as br
from compx3 a left join comp.adsprate b
on (a.gvkey = b.gvkey) and
(a.datadate = b.datadate);
quit; 

/*******************************************************************************************
* STEP TWO: Link GVKEYS to CRSP Identifiers;                                               *
* Use CCMXPF_LINKTABLE table to obtain CRSP identifiers for our subset of companies/dates; *
********************************************************************************************/

rsubmit;
data ccmxpf_linktable1;
set crsp.ccmxpf_linktable;;
gvkey1=gvkey*1;
run;

data compx5;
set compx4;
gvkey1=gvkey*1;
run;

rsubmit;
proc sql;
  create table mydata as select *
  from compx5 as a, ccmxpf_linktable1 as b
  where a.gvkey1 = b.gvkey1 and
  b.LINKTYPE in ("LU","LC","LD","LN","LS","LX") and
  b.usedflag=1 and (b.LINKDT <= a.endfyr or b.LINKDT = .B) and (a.endfyr <= b.LINKENDDT or b.LINKENDDT = .E);                
quit;

/***************************************************************************************
* STEP THREE: Add CRSP Monthly price data;
***************************************************************************************/

rsubmit;
proc sql;
    create table mydata2 as select *
    from mydata as a, crsp.msf as b
    where a.lpermno = b.permno 
    and	intck('month',a.endfyr,b.date) between 6 and 17;
quit;

/***************************************************************************************
* STEP FOUR: Creating variables;
***************************************************************************************/
rsubmit;
proc sort data=mydata2; by permno date;
run;

data file1;
set mydata2;
size=abs(prc)*shrout;
mtb=(size/1000)/ceq;
ebit=IB+XINT+TXT+SPI;
Payout=DV/ebit;
run;

data file2;
set file1;
/**keep Gvkey1 Tic PERMNO CUSIP Fyear Fyr Begfyr Endfyr DATE 
     AT CEQ Dlc Dltt DD1 DD2 DD3 DD4 DD5 HEXCD Size PRC RET RET1 MTB EBIT Payout br SICH sic;**/
run;

/***************************************************************************************
* Adding data from Compustat querterly file ;
***************************************************************************************/
rsubmit;
data fileq1;
set comp.fundq (keep = gvkey fyearq fyr fqtr SALEQ COGSQ IBQ XINTQ TXTQ SPIQ LTQ ATQ NIQ CHEQ SEQQ TXDITCQ PSTKQ datadate
                       DLCQ DLTTQ ACTQ LCTQ REQ CEQQ XSGAQ FOPTY
                       cshoq prccq XINTQ ACTQ LCTQ DVY txpq dpq AJEXQ XRDQ MIBQ PSTKQ PPEGTQ INVTQ TXDIQ);                     
run;

data fileq2;
set fileq1;
gvkey1=gvkey*1;
run;

proc sql;
create table file3 as select a.*, b.*
from file2 as a left join fileq2 as b
on a.gvkey1 = b.gvkey1 and intck('month', b.datadate, a.date) between 3 and 5;
quit;


***************************************************************
* Data cleaning
***************************************************************;
rsubmit;
proc sql;
create table mydata6 as select a.*, b.HSHRCD
from file3 as a left join crsp.msfhdr as b
on a.permno = b.permno;
quit;


data mydata7; set mydata6; 
if (HSHRCD > 14) or  (HSHRCD < 10) then delete; 
run;  

***************************************************************
* Operating leverage
***************************************************************;

rsubmit;
data oprt_lvg;
set mydata7;
OL1_FA_TA=(COGS+XSGA)/AT;
run;

rsubmit;
proc sort data=oprt_lvg;
by permno date;
run;

rsubmit;
data oprt_lvg2;
set oprt_lvg;
lp=lag12(permno);
lagOIBDP=lag12(OIBDP);
lagSALE=lag12(SALE);
if lp=permno then do;
l_OIBDP=lagOIBDP;
l_SALE=lagSALE;
end;
keep permno tic gvkey cusip date OL1_FA_TA OIBDP l_OIBDP SALE l_SALE sic;
run;

rsubmit;
data oprt_lvg3;
set oprt_lvg2;
pct_OIBDP=(OIBDP-l_OIBDP)/l_OIBDP;
pct_SALE=(SALE-l_SALE)/l_SALE;
pct_OIBDP_SALE=pct_OIBDP/pct_SALE;
run;

rsubmit;
proc sort data=oprt_lvg3;
by permno date;
run;

rsubmit;
data oprt_lvg4;
set oprt_lvg3;
lp=lag24(permno);
lag_pct_OIBDP_SALE=lag12(pct_OIBDP_SALE);
lag2_pct_OIBDP_SALE=lag24(pct_OIBDP_SALE);
if lp=permno then do;
l_pct_OIBDP_SALE=lag_pct_OIBDP_SALE;
l2_pct_OIBDP_SALE=lag2_pct_OIBDP_SALE;
end;
run;

rsubmit;
data oprt_lvg5;
set oprt_lvg4;
OL2_pct_OIBDP_SALE=(pct_OIBDP_SALE+l_pct_OIBDP_SALE+l2_pct_OIBDP_SALE)/3;
/**if OL1_FA_TA ge 0 and OL1_FA_TA le 1;
if OL2_pct_OIBDP_SALE ge -2 and OL2_pct_OIBDP_SALE le 2;**/
year=year(date);
month=month(date);
if year ge 1995 and year le 2017;
keep date year month permno tic gvkey cusip SALE OL1_FA_TA OL2_pct_OIBDP_SALE sic;
run;

rsubmit;
proc univariate data=oprt_lvg5;
var OL1_FA_TA OL2_pct_OIBDP_SALE;
run;

rsubmit;
proc download data=oprt_lvg5 out=asie.operating_leverage;
run;

***************************************************************
* Convert to the FF48 industry classifications
***************************************************************;

data asie.operating_leverage2;
	set asie.operating_leverage;
	
	if ( sic ge 0100 and sic le 0199) or ( sic ge 0200 and sic le 0299) or ( sic ge 0700 and sic le 0799) or ( sic ge 0910 and sic le 0919) or ( sic ge 2048 and sic le 2048) then ff48= 1;
	if ( sic ge 2000 and sic le 2009) or ( sic ge 2010 and sic le 2019) or ( sic ge 2020 and sic le 2029) or ( sic ge 2030 and sic le 2039) or ( sic ge 2040 and sic le 2046) or ( sic ge 2050 and sic le 2059) or ( sic ge 2060 and sic le 2063) or ( sic ge 2070 and sic le 2079) or ( sic ge 2090 and sic le 2092) or ( sic ge 2095 and sic le 2095) or ( sic ge 2098 and sic le 2099) then ff48= 2;
	if ( sic ge 2064 and sic le 2068) or ( sic ge 2086 and sic le 2086) or ( sic ge 2087 and sic le 2087) or ( sic ge 2096 and sic le 2096) or ( sic ge 2097 and sic le 2097) then ff48= 3;
	if ( sic ge 2080 and sic le 2080) or ( sic ge 2082 and sic le 2082) or ( sic ge 2083 and sic le 2083) or ( sic ge 2084 and sic le 2084) or ( sic ge 2085 and sic le 2085) then ff48= 4;
	if ( sic ge 2100 and sic le 2199) then ff48= 5;
	if ( sic ge 0920 and sic le 0999) or ( sic ge 3650 and sic le 3651) or ( sic ge 3652 and sic le 3652) or ( sic ge 3732 and sic le 3732) or ( sic ge 3930 and sic le 3931) or ( sic ge 3940 and sic le 3949) then ff48= 6;
	if ( sic ge 7800 and sic le 7829) or ( sic ge 7830 and sic le 7833) or ( sic ge 7840 and sic le 7841) or ( sic ge 7900 and sic le 7900) or ( sic ge 7910 and sic le 7911) or ( sic ge 7920 and sic le 7929) or ( sic ge 7930 and sic le 7933) or ( sic ge 7940 and sic le 7949) or ( sic ge 7980 and sic le 7980) or ( sic ge 7990 and sic le 7999) then ff48= 7;
	if ( sic ge 2700 and sic le 2709) or ( sic ge 2710 and sic le 2719) or ( sic ge 2720 and sic le 2729) or ( sic ge 2730 and sic le 2739) or ( sic ge 2740 and sic le 2749) or ( sic ge 2770 and sic le 2771) or ( sic ge 2780 and sic le 2789) or ( sic ge 2790 and sic le 2799) then ff48= 8;
	if ( sic ge 2047 and sic le 2047) or ( sic ge 2391 and sic le 2392) or ( sic ge 2510 and sic le 2519) or ( sic ge 2590 and sic le 2599) or ( sic ge 2840 and sic le 2843) or ( sic ge 2844 and sic le 2844) or ( sic ge 3160 and sic le 3161) or ( sic ge 3170 and sic le 3171) or ( sic ge 3172 and sic le 3172) or ( sic ge 3190 and sic le 3199) or ( sic ge 3229 and sic le 3229) or ( sic ge 3260 and sic le 3260) or ( sic ge 3262 and sic le 3263) or ( sic ge 3269 and sic le 3269) or ( sic ge 3230 and sic le 3231) or ( sic ge 3630 and sic le 3639) or ( sic ge 3750 and sic le 3751) or ( sic ge 3800 and sic le 3800) or ( sic ge 3860 and sic le 3861) or ( sic ge 3870 and sic le 3873) or ( sic ge 3910 and sic le 3911) or ( sic ge 3914 and sic le 3914) or ( sic ge 3915 and sic le 3915) or ( sic ge 3960 and sic le 3962) or ( sic ge 3991 and sic le 3991) or ( sic ge 3995 and sic le 3995) then ff48= 9;
	if ( sic ge 2300 and sic le 2390) or ( sic ge 3020 and sic le 3021) or ( sic ge 3100 and sic le 3111) or ( sic ge 3130 and sic le 3131) or ( sic ge 3140 and sic le 3149) or ( sic ge 3150 and sic le 3151) or ( sic ge 3963 and sic le 3965) then ff48=10;
	if ( sic ge 8000 and sic le 8099) then ff48=11;
	if ( sic ge 3693 and sic le 3693) or ( sic ge 3840 and sic le 3849) or ( sic ge 3850 and sic le 3851) then ff48=12;
	if ( sic ge 2830 and sic le 2830) or ( sic ge 2831 and sic le 2831) or ( sic ge 2833 and sic le 2833) or ( sic ge 2834 and sic le 2834) or ( sic ge 2835 and sic le 2835) or ( sic ge 2836 and sic le 2836) then ff48=13;
	if ( sic ge 2800 and sic le 2809) or ( sic ge 2810 and sic le 2819) or ( sic ge 2820 and sic le 2829) or ( sic ge 2850 and sic le 2859) or ( sic ge 2860 and sic le 2869) or ( sic ge 2870 and sic le 2879) or ( sic ge 2890 and sic le 2899) then ff48=14;
	if ( sic ge 3031 and sic le 3031) or ( sic ge 3041 and sic le 3041) or ( sic ge 3050 and sic le 3053) or ( sic ge 3060 and sic le 3069) or ( sic ge 3070 and sic le 3079) or ( sic ge 3080 and sic le 3089) or ( sic ge 3090 and sic le 3099) then ff48=15;
	if ( sic ge 2200 and sic le 2269) or ( sic ge 2270 and sic le 2279) or ( sic ge 2280 and sic le 2284) or ( sic ge 2290 and sic le 2295) or ( sic ge 2297 and sic le 2297) or ( sic ge 2298 and sic le 2298) or ( sic ge 2299 and sic le 2299) or ( sic ge 2393 and sic le 2395) or ( sic ge 2397 and sic le 2399) then ff48=16;
	if ( sic ge 0800 and sic le 0899) or ( sic ge 2400 and sic le 2439) or ( sic ge 2450 and sic le 2459) or ( sic ge 2490 and sic le 2499) or ( sic ge 2660 and sic le 2661) or ( sic ge 2950 and sic le 2952) or ( sic ge 3200 and sic le 3200) or ( sic ge 3210 and sic le 3211) or ( sic ge 3240 and sic le 3241) or ( sic ge 3250 and sic le 3259) or ( sic ge 3261 and sic le 3261) or ( sic ge 3264 and sic le 3264) or ( sic ge 3270 and sic le 3275) or ( sic ge 3280 and sic le 3281) or ( sic ge 3290 and sic le 3293) or ( sic ge 3295 and sic le 3299) or ( sic ge 3420 and sic le 3429) or ( sic ge 3430 and sic le 3433) or ( sic ge 3440 and sic le 3441) or ( sic ge 3442 and sic le 3442) or ( sic ge 3446 and sic le 3446) or ( sic ge 3448 and sic le 3448) or ( sic ge 3449 and sic le 3449) or ( sic ge 3450 and sic le 3451) or ( sic ge 3452 and sic le 3452) or ( sic ge 3490 and sic le 3499) or ( sic ge 3996 and sic le 3996) then ff48=17;
	if ( sic ge 1500 and sic le 1511) or ( sic ge 1520 and sic le 1529) or ( sic ge 1530 and sic le 1539) or ( sic ge 1540 and sic le 1549) or ( sic ge 1600 and sic le 1699) or ( sic ge 1700 and sic le 1799) then ff48=18;
	if ( sic ge 3300 and sic le 3300) or ( sic ge 3310 and sic le 3317) or ( sic ge 3320 and sic le 3325) or ( sic ge 3330 and sic le 3339) or ( sic ge 3340 and sic le 3341) or ( sic ge 3350 and sic le 3357) or ( sic ge 3360 and sic le 3369) or ( sic ge 3370 and sic le 3379) or ( sic ge 3390 and sic le 3399) then ff48=19;
	if ( sic ge 3400 and sic le 3400) or ( sic ge 3443 and sic le 3443) or ( sic ge 3444 and sic le 3444) or ( sic ge 3460 and sic le 3469) or ( sic ge 3470 and sic le 3479) then ff48=20;
	if ( sic ge 3510 and sic le 3519) or ( sic ge 3520 and sic le 3529) or ( sic ge 3530 and sic le 3530) or ( sic ge 3531 and sic le 3531) or ( sic ge 3532 and sic le 3532) or ( sic ge 3533 and sic le 3533) or ( sic ge 3534 and sic le 3534) or ( sic ge 3535 and sic le 3535) or ( sic ge 3536 and sic le 3536) or ( sic ge 3538 and sic le 3538) or ( sic ge 3540 and sic le 3549) or ( sic ge 3550 and sic le 3559) or ( sic ge 3560 and sic le 3569) or ( sic ge 3580 and sic le 3580) or ( sic ge 3581 and sic le 3581) or ( sic ge 3582 and sic le 3582) or ( sic ge 3585 and sic le 3585) or ( sic ge 3586 and sic le 3586) or ( sic ge 3589 and sic le 3589) or ( sic ge 3590 and sic le 3599) then ff48=21;
	if ( sic ge 3600 and sic le 3600) or ( sic ge 3610 and sic le 3613) or ( sic ge 3620 and sic le 3621) or ( sic ge 3623 and sic le 3629) or ( sic ge 3640 and sic le 3644) or ( sic ge 3645 and sic le 3645) or ( sic ge 3646 and sic le 3646) or ( sic ge 3648 and sic le 3649) or ( sic ge 3660 and sic le 3660) or ( sic ge 3690 and sic le 3690) or ( sic ge 3691 and sic le 3692) or ( sic ge 3699 and sic le 3699) then ff48=22;
	if ( sic ge 2296 and sic le 2296) or ( sic ge 2396 and sic le 2396) or ( sic ge 3010 and sic le 3011) or ( sic ge 3537 and sic le 3537) or ( sic ge 3647 and sic le 3647) or ( sic ge 3694 and sic le 3694) or ( sic ge 3700 and sic le 3700) or ( sic ge 3710 and sic le 3710) or ( sic ge 3711 and sic le 3711) or ( sic ge 3713 and sic le 3713) or ( sic ge 3714 and sic le 3714) or ( sic ge 3715 and sic le 3715) or ( sic ge 3716 and sic le 3716) or ( sic ge 3792 and sic le 3792) or ( sic ge 3790 and sic le 3791) or ( sic ge 3799 and sic le 3799) then ff48=23;
	if ( sic ge 3720 and sic le 3720) or ( sic ge 3721 and sic le 3721) or ( sic ge 3723 and sic le 3724) or ( sic ge 3725 and sic le 3725) or ( sic ge 3728 and sic le 3729) then ff48=24;
	if ( sic ge 3730 and sic le 3731) or ( sic ge 3740 and sic le 3743) then ff48=25;
	if ( sic ge 3760 and sic le 3769) or ( sic ge 3795 and sic le 3795) or ( sic ge 3480 and sic le 3489) then ff48=26;
	if ( sic ge 1040 and sic le 1049) then ff48=27;
	if ( sic ge 1000 and sic le 1009) or ( sic ge 1010 and sic le 1019) or ( sic ge 1020 and sic le 1029) or ( sic ge 1030 and sic le 1039) or ( sic ge 1050 and sic le 1059) or ( sic ge 1060 and sic le 1069) or ( sic ge 1070 and sic le 1079) or ( sic ge 1080 and sic le 1089) or ( sic ge 1090 and sic le 1099) or ( sic ge 1100 and sic le 1119) or ( sic ge 1400 and sic le 1499) then ff48=28;
	if ( sic ge 1200 and sic le 1299) then ff48=29;
	if ( sic ge 1300 and sic le 1300) or ( sic ge 1310 and sic le 1319) or ( sic ge 1320 and sic le 1329) or ( sic ge 1330 and sic le 1339) or ( sic ge 1370 and sic le 1379) or ( sic ge 1380 and sic le 1380) or ( sic ge 1381 and sic le 1381) or ( sic ge 1382 and sic le 1382) or ( sic ge 1389 and sic le 1389) or ( sic ge 2900 and sic le 2912) or ( sic ge 2990 and sic le 2999) then ff48=30;
	if ( sic ge 4900 and sic le 4900) or ( sic ge 4910 and sic le 4911) or ( sic ge 4920 and sic le 4922) or ( sic ge 4923 and sic le 4923) or ( sic ge 4924 and sic le 4925) or ( sic ge 4930 and sic le 4931) or ( sic ge 4932 and sic le 4932) or ( sic ge 4939 and sic le 4939) or ( sic ge 4940 and sic le 4942) then ff48=31;
	if ( sic ge 4800 and sic le 4800) or ( sic ge 4810 and sic le 4813) or ( sic ge 4820 and sic le 4822) or ( sic ge 4830 and sic le 4839) or ( sic ge 4840 and sic le 4841) or ( sic ge 4880 and sic le 4889) or ( sic ge 4890 and sic le 4890) or ( sic ge 4891 and sic le 4891) or ( sic ge 4892 and sic le 4892) or ( sic ge 4899 and sic le 4899) then ff48=32;
	if ( sic ge 7020 and sic le 7021) or ( sic ge 7030 and sic le 7033) or ( sic ge 7200 and sic le 7200) or ( sic ge 7210 and sic le 7212) or ( sic ge 7214 and sic le 7214) or ( sic ge 7215 and sic le 7216) or ( sic ge 7217 and sic le 7217) or ( sic ge 7219 and sic le 7219) or ( sic ge 7220 and sic le 7221) or ( sic ge 7230 and sic le 7231) or ( sic ge 7240 and sic le 7241) or ( sic ge 7250 and sic le 7251) or ( sic ge 7260 and sic le 7269) or ( sic ge 7270 and sic le 7290) or ( sic ge 7291 and sic le 7291) or ( sic ge 7292 and sic le 7299) or ( sic ge 7395 and sic le 7395) or ( sic ge 7500 and sic le 7500) or ( sic ge 7520 and sic le 7529) or ( sic ge 7530 and sic le 7539) or ( sic ge 7540 and sic le 7549) or ( sic ge 7600 and sic le 7600) or ( sic ge 7620 and sic le 7620) or ( sic ge 7622 and sic le 7622) or ( sic ge 7623 and sic le 7623) or ( sic ge 7629 and sic le 7629) or ( sic ge 7630 and sic le 7631) or ( sic ge 7640 and sic le 7641) or ( sic ge 7690 and sic le 7699) or ( sic ge 8100 and sic le 8199) or ( sic ge 8200 and sic le 8299) or ( sic ge 8300 and sic le 8399) or ( sic ge 8400 and sic le 8499) or ( sic ge 8600 and sic le 8699) or ( sic ge 8800 and sic le 8899) or ( sic ge 7510 and sic le 7515) then ff48=33;
	if ( sic ge 2750 and sic le 2759) or ( sic ge 3993 and sic le 3993) or ( sic ge 7218 and sic le 7218) or ( sic ge 7300 and sic le 7300) or ( sic ge 7310 and sic le 7319) or ( sic ge 7320 and sic le 7329) or ( sic ge 7330 and sic le 7339) or ( sic ge 7340 and sic le 7342) or ( sic ge 7349 and sic le 7349) or ( sic ge 7350 and sic le 7351) or ( sic ge 7352 and sic le 7352) or ( sic ge 7353 and sic le 7353) or ( sic ge 7359 and sic le 7359) or ( sic ge 7360 and sic le 7369) or ( sic ge 7370 and sic le 7372) or ( sic ge 7374 and sic le 7374) or ( sic ge 7375 and sic le 7375) or ( sic ge 7376 and sic le 7376) or ( sic ge 7377 and sic le 7377) or ( sic ge 7378 and sic le 7378) or ( sic ge 7379 and sic le 7379) or ( sic ge 7380 and sic le 7380) or ( sic ge 7381 and sic le 7382) or ( sic ge 7383 and sic le 7383) or ( sic ge 7384 and sic le 7384) or ( sic ge 7385 and sic le 7385) or ( sic ge 7389 and sic le 7390) or ( sic ge 7391 and sic le 7391) or ( sic ge 7392 and sic le 7392) or ( sic ge 7393 and sic le 7393) or ( sic ge 7394 and sic le 7394) or ( sic ge 7396 and sic le 7396) or ( sic ge 7397 and sic le 7397) or ( sic ge 7399 and sic le 7399) or ( sic ge 7519 and sic le 7519) or ( sic ge 8700 and sic le 8700) or ( sic ge 8710 and sic le 8713) or ( sic ge 8720 and sic le 8721) or ( sic ge 8730 and sic le 8734) or ( sic ge 8740 and sic le 8748) or ( sic ge 8900 and sic le 8910) or ( sic ge 8911 and sic le 8911) or ( sic ge 8920 and sic le 8999) or ( sic ge 4220 and sic le 4229) then ff48=34;
	if ( sic ge 3570 and sic le 3579) or ( sic ge 3680 and sic le 3680) or ( sic ge 3681 and sic le 3681) or ( sic ge 3682 and sic le 3682) or ( sic ge 3683 and sic le 3683) or ( sic ge 3684 and sic le 3684) or ( sic ge 3685 and sic le 3685) or ( sic ge 3686 and sic le 3686) or ( sic ge 3687 and sic le 3687) or ( sic ge 3688 and sic le 3688) or ( sic ge 3689 and sic le 3689) or ( sic ge 3695 and sic le 3695) or ( sic ge 7373 and sic le 7373) then ff48=35;
	if ( sic ge 3622 and sic le 3622) or ( sic ge 3661 and sic le 3661) or ( sic ge 3662 and sic le 3662) or ( sic ge 3663 and sic le 3663) or ( sic ge 3664 and sic le 3664) or ( sic ge 3665 and sic le 3665) or ( sic ge 3666 and sic le 3666) or ( sic ge 3669 and sic le 3669) or ( sic ge 3670 and sic le 3679) or ( sic ge 3810 and sic le 3810) or ( sic ge 3812 and sic le 3812) then ff48=36;
	if ( sic ge 3811 and sic le 3811) or ( sic ge 3820 and sic le 3820) or ( sic ge 3821 and sic le 3821) or ( sic ge 3822 and sic le 3822) or ( sic ge 3823 and sic le 3823) or ( sic ge 3824 and sic le 3824) or ( sic ge 3825 and sic le 3825) or ( sic ge 3826 and sic le 3826) or ( sic ge 3827 and sic le 3827) or ( sic ge 3829 and sic le 3829) or ( sic ge 3830 and sic le 3839) then ff48=37;
	if ( sic ge 2520 and sic le 2549) or ( sic ge 2600 and sic le 2639) or ( sic ge 2670 and sic le 2699) or ( sic ge 2760 and sic le 2761) or ( sic ge 3950 and sic le 3955) then ff48=38;
	if ( sic ge 2440 and sic le 2449) or ( sic ge 2640 and sic le 2659) or ( sic ge 3220 and sic le 3221) or ( sic ge 3410 and sic le 3412) then ff48=39;
	if ( sic ge 4000 and sic le 4013) or ( sic ge 4040 and sic le 4049) or ( sic ge 4100 and sic le 4100) or ( sic ge 4110 and sic le 4119) or ( sic ge 4120 and sic le 4121) or ( sic ge 4130 and sic le 4131) or ( sic ge 4140 and sic le 4142) or ( sic ge 4150 and sic le 4151) or ( sic ge 4170 and sic le 4173) or ( sic ge 4190 and sic le 4199) or ( sic ge 4200 and sic le 4200) or ( sic ge 4210 and sic le 4219) or ( sic ge 4230 and sic le 4231) or ( sic ge 4240 and sic le 4249) or ( sic ge 4400 and sic le 4499) or ( sic ge 4500 and sic le 4599) or ( sic ge 4600 and sic le 4699) or ( sic ge 4700 and sic le 4700) or ( sic ge 4710 and sic le 4712) or ( sic ge 4720 and sic le 4729) or ( sic ge 4730 and sic le 4739) or ( sic ge 4740 and sic le 4749) or ( sic ge 4780 and sic le 4780) or ( sic ge 4782 and sic le 4782) or ( sic ge 4783 and sic le 4783) or ( sic ge 4784 and sic le 4784) or ( sic ge 4785 and sic le 4785) or ( sic ge 4789 and sic le 4789) then ff48=40;
	if ( sic ge 5000 and sic le 5000) or ( sic ge 5010 and sic le 5015) or ( sic ge 5020 and sic le 5023) or ( sic ge 5030 and sic le 5039) or ( sic ge 5040 and sic le 5042) or ( sic ge 5043 and sic le 5043) or ( sic ge 5044 and sic le 5044) or ( sic ge 5045 and sic le 5045) or ( sic ge 5046 and sic le 5046) or ( sic ge 5047 and sic le 5047) or ( sic ge 5048 and sic le 5048) or ( sic ge 5049 and sic le 5049) or ( sic ge 5050 and sic le 5059) or ( sic ge 5060 and sic le 5060) or ( sic ge 5063 and sic le 5063) or ( sic ge 5064 and sic le 5064) or ( sic ge 5065 and sic le 5065) or ( sic ge 5070 and sic le 5078) or ( sic ge 5080 and sic le 5080) or ( sic ge 5081 and sic le 5081) or ( sic ge 5082 and sic le 5082) or ( sic ge 5083 and sic le 5083) or ( sic ge 5084 and sic le 5084) or ( sic ge 5085 and sic le 5085) or ( sic ge 5086 and sic le 5087) or ( sic ge 5088 and sic le 5088) or ( sic ge 5090 and sic le 5090) or ( sic ge 5091 and sic le 5092) or ( sic ge 5093 and sic le 5093) or ( sic ge 5094 and sic le 5094) or ( sic ge 5099 and sic le 5099) or ( sic ge 5100 and sic le 5100) or ( sic ge 5110 and sic le 5113) or ( sic ge 5120 and sic le 5122) or ( sic ge 5130 and sic le 5139) or ( sic ge 5140 and sic le 5149) or ( sic ge 5150 and sic le 5159) or ( sic ge 5160 and sic le 5169) or ( sic ge 5170 and sic le 5172) or ( sic ge 5180 and sic le 5182) or ( sic ge 5190 and sic le 5199) then ff48=41;
	if ( sic ge 5200 and sic le 5200) or ( sic ge 5210 and sic le 5219) or ( sic ge 5220 and sic le 5229) or ( sic ge 5230 and sic le 5231) or ( sic ge 5250 and sic le 5251) or ( sic ge 5260 and sic le 5261) or ( sic ge 5270 and sic le 5271) or ( sic ge 5300 and sic le 5300) or ( sic ge 5310 and sic le 5311) or ( sic ge 5320 and sic le 5320) or ( sic ge 5330 and sic le 5331) or ( sic ge 5334 and sic le 5334) or ( sic ge 5340 and sic le 5349) or ( sic ge 5390 and sic le 5399) or ( sic ge 5400 and sic le 5400) or ( sic ge 5410 and sic le 5411) or ( sic ge 5412 and sic le 5412) or ( sic ge 5420 and sic le 5429) or ( sic ge 5430 and sic le 5439) or ( sic ge 5440 and sic le 5449) or ( sic ge 5450 and sic le 5459) or ( sic ge 5460 and sic le 5469) or ( sic ge 5490 and sic le 5499) or ( sic ge 5500 and sic le 5500) or ( sic ge 5510 and sic le 5529) or ( sic ge 5530 and sic le 5539) or ( sic ge 5540 and sic le 5549) or ( sic ge 5550 and sic le 5559) or ( sic ge 5560 and sic le 5569) or ( sic ge 5570 and sic le 5579) or ( sic ge 5590 and sic le 5599) or ( sic ge 5600 and sic le 5699) or ( sic ge 5700 and sic le 5700) or ( sic ge 5710 and sic le 5719) or ( sic ge 5720 and sic le 5722) or ( sic ge 5730 and sic le 5733) or ( sic ge 5734 and sic le 5734) or ( sic ge 5735 and sic le 5735) or ( sic ge 5736 and sic le 5736) or ( sic ge 5750 and sic le 5799) or ( sic ge 5900 and sic le 5900) or ( sic ge 5910 and sic le 5912) or ( sic ge 5920 and sic le 5929) or ( sic ge 5930 and sic le 5932) or ( sic ge 5940 and sic le 5940) or ( sic ge 5941 and sic le 5941) or ( sic ge 5942 and sic le 5942) or ( sic ge 5943 and sic le 5943) or ( sic ge 5944 and sic le 5944) or ( sic ge 5945 and sic le 5945) or ( sic ge 5946 and sic le 5946) or ( sic ge 5947 and sic le 5947) or ( sic ge 5948 and sic le 5948) or ( sic ge 5949 and sic le 5949) or ( sic ge 5950 and sic le 5959) or ( sic ge 5960 and sic le 5969) or ( sic ge 5970 and sic le 5979) or ( sic ge 5980 and sic le 5989) or ( sic ge 5990 and sic le 5990) or ( sic ge 5992 and sic le 5992) or ( sic ge 5993 and sic le 5993) or ( sic ge 5994 and sic le 5994) or ( sic ge 5995 and sic le 5995) or ( sic ge 5999 and sic le 5999) then ff48=42;
	if ( sic ge 5800 and sic le 5819) or ( sic ge 5820 and sic le 5829) or ( sic ge 5890 and sic le 5899) or ( sic ge 7000 and sic le 7000) or ( sic ge 7010 and sic le 7019) or ( sic ge 7040 and sic le 7049) or ( sic ge 7213 and sic le 7213) then ff48=43;
	if ( sic ge 6000 and sic le 6000) or ( sic ge 6010 and sic le 6019) or ( sic ge 6020 and sic le 6020) or ( sic ge 6021 and sic le 6021) or ( sic ge 6022 and sic le 6022) or ( sic ge 6023 and sic le 6024) or ( sic ge 6025 and sic le 6025) or ( sic ge 6026 and sic le 6026) or ( sic ge 6027 and sic le 6027) or ( sic ge 6028 and sic le 6029) or ( sic ge 6030 and sic le 6036) or ( sic ge 6040 and sic le 6059) or ( sic ge 6060 and sic le 6062) or ( sic ge 6080 and sic le 6082) or ( sic ge 6090 and sic le 6099) or ( sic ge 6100 and sic le 6100) or ( sic ge 6110 and sic le 6111) or ( sic ge 6112 and sic le 6113) or ( sic ge 6120 and sic le 6129) or ( sic ge 6130 and sic le 6139) or ( sic ge 6140 and sic le 6149) or ( sic ge 6150 and sic le 6159) or ( sic ge 6160 and sic le 6169) or ( sic ge 6170 and sic le 6179) or ( sic ge 6190 and sic le 6199) then ff48=44;
	if ( sic ge 6300 and sic le 6300) or ( sic ge 6310 and sic le 6319) or ( sic ge 6320 and sic le 6329) or ( sic ge 6330 and sic le 6331) or ( sic ge 6350 and sic le 6351) or ( sic ge 6360 and sic le 6361) or ( sic ge 6370 and sic le 6379) or ( sic ge 6390 and sic le 6399) or ( sic ge 6400 and sic le 6411) then ff48=45;
	if ( sic ge 6500 and sic le 6500) or ( sic ge 6510 and sic le 6510) or ( sic ge 6512 and sic le 6512) or ( sic ge 6513 and sic le 6513) or ( sic ge 6514 and sic le 6514) or ( sic ge 6515 and sic le 6515) or ( sic ge 6517 and sic le 6519) or ( sic ge 6520 and sic le 6529) or ( sic ge 6530 and sic le 6531) or ( sic ge 6532 and sic le 6532) or ( sic ge 6540 and sic le 6541) or ( sic ge 6550 and sic le 6553) or ( sic ge 6590 and sic le 6599) or ( sic ge 6610 and sic le 6611) then ff48=46;
	if ( sic ge 6200 and sic le 6299) or ( sic ge 6700 and sic le 6700) or ( sic ge 6710 and sic le 6719) or ( sic ge 6720 and sic le 6722) or ( sic ge 6723 and sic le 6723) or ( sic ge 6724 and sic le 6724) or ( sic ge 6725 and sic le 6725) or ( sic ge 6726 and sic le 6726) or ( sic ge 6730 and sic le 6733) or ( sic ge 6740 and sic le 6779) or ( sic ge 6790 and sic le 6791) or ( sic ge 6792 and sic le 6792) or ( sic ge 6793 and sic le 6793) or ( sic ge 6794 and sic le 6794) or ( sic ge 6795 and sic le 6795) or ( sic ge 6798 and sic le 6798) or ( sic ge 6799 and sic le 6799) then ff48=47;
	if ( sic ge 4950 and sic le 4959) or ( sic ge 4960 and sic le 4961) or ( sic ge 4970 and sic le 4971) or ( sic ge 4990 and sic le 4991) then ff48=48;

	run;


/** add the GIC **/

proc sql;
  create table asie.operating_leverage3 as
  select a.*,b.*
    from asie.operating_leverage2 as a left join asie.firm_gic as b
    on a.tic=b.ticker 
    and a.year=b.fyear
	and a.month=b.fmonth;
 run;


/****************************************************/
/** get the industry-mean avergae operating leverage 
/****************************************************/

*----------------------winsorizing outliers-----------------------------------------------------;
%macro outliers();

%let vars= OL1_FA_TA OL2_pct_OIBDP_SALE;

%let cvar=%scan(&vars,1);
%let pos=1;

%do %while (&cvar ne );

%put &cvar;

proc sort data=asie.operating_leverage3; by year; run;

proc means data=asie.operating_leverage3 min p1 p99 max median mean n noprint;
by year;
var &cvar;
output out = asie.outliers 
p1 (&cvar) = p1_&cvar
p99 (&cvar) = p99_&cvar;
run;

proc sql;
create table asie.temp as select a.*, b.p1_&cvar, b.p99_&cvar
from asie.operating_leverage3 as a left join asie.outliers as b
on a.year=b.year;
quit;

data asie.operating_leverage3; set asie.temp;
if &cvar<p1_&cvar and &cvar ne . and p1_&cvar ne . then &cvar=p1_&cvar;
if &cvar>p99_&cvar and &cvar ne . and p99_&cvar ne . then &cvar=p99_&cvar;
drop p1_&cvar p99_&cvar;
run;

%let pos=%eval(&pos+1);
%let cvar=%scan(&vars,&pos);

%end;

%mend outliers;


%outliers;

proc sort data=asie.operating_leverage3; by permno year; run;

data asie.operating_leverage4; set asie.operating_leverage3; run;


/** avergae operating leverage by GIC **/

proc sort data=asie.operating_leverage4;
by year month gic;
run;

proc means data=asie.operating_leverage4 noprint;
by year month gic;
var OL1_FA_TA OL2_pct_OIBDP_SALE; 
output out=asie.operating_leverage4_mean mean=ind_GIC_OL1 ind_GIC_OL2;
proc print;
run;

proc sort data=asie.operating_leverage4_mean;
by year month ind_GIC_OL1;
run;

proc rank groups=2 data=asie.operating_leverage4_mean out=asie.operating_leverage4_rank;
var ind_GIC_OL1; 
ranks r_GIC_OL1;
by year month;
run;

data asie.GIC_OL1;
set asie.operating_leverage4_rank;
if GIC ne .;
keep GIC year month ind_GIC_OL1 r_GIC_OL1;
proc print;
run;

proc sql;
  create table asie.operating_leverage5 as
  select a.*,b.*
    from asie.operating_leverage4 as a left join asie.operating_leverage4_rank as b 
    on a.year=b.year 
    and a.month=b.month
    and a.gic=b.gic;
 run;

/** avergae operating leverage by ff48 **/

proc sort data=asie.operating_leverage5;
by year month ff48;
run;

proc means data=asie.operating_leverage5 noprint;
by year month ff48;
var OL1_FA_TA OL2_pct_OIBDP_SALE; 
output out=asie.operating_leverage5_mean mean=ind_ff48_OL1 ind_ff48_OL2;
proc print;
run;

proc sort data=asie.operating_leverage5_mean;
by year month ind_ff48_OL1;
run;

proc rank groups=2 data=asie.operating_leverage5_mean out=asie.operating_leverage5_rank;
var ind_ff48_OL1; 
ranks r_ff48_OL1;
by year month;
run;

data asie.ff48_OL1;
set asie.operating_leverage5_rank;
if ff48 ne .;
keep ff48 year month ind_ff48_OL1 r_ff48_OL1;
proc print;
run;

proc sql;
  create table asie.operating_leverage6 as
  select a.*,b.*
    from asie.operating_leverage5 as a left join asie.operating_leverage5_rank as b 
    on a.year=b.year 
    and a.month=b.month
    and a.ff48=b.ff48;
 run;


/****************************************************/
/** get the industry-concentration by HH index 
/****************************************************/

/** HH for GIC **/

proc sort data=asie.operating_leverage6;
by year month gic;
run;

proc means data=asie.operating_leverage6 noprint;
by year month gic;
var SALE; 
output out=asie.operating_leverage6_sum sum=sum_GIC_SALE;
proc print;
run;

proc sql;
  create table asie.operating_leverage7 as
  select a.*,b.*
    from asie.operating_leverage6 as a left join asie.operating_leverage6_sum as b 
    on a.year=b.year 
    and a.month=b.month
    and a.gic=b.gic;
 run;

data asie.operating_leverage8;
set asie.operating_leverage7;
pct_sale_2=(SALE/sum_GIC_SALE)**2;
run;

proc sort data=asie.operating_leverage8;
by year month gic;
run;

proc means data=asie.operating_leverage8 noprint;
by year month gic;
var pct_sale_2; 
output out=asie.operating_leverage8_HH sum=HH_GIC_SALE;
proc print;
run;

proc sort data=asie.operating_leverage8_HH;
by year month HH_GIC_SALE;
run;

proc rank groups=2 data=asie.operating_leverage8_HH out=asie.operating_leverage8_HH_rank;
var HH_GIC_SALE; 
ranks r_HH_GIC_SALE;
by year month;
run;

data asie.GIC_HH;
set asie.operating_leverage8_HH_rank;
if GIC ne .;
keep GIC year month HH_GIC_SALE r_HH_GIC_SALE;
proc print;
run;

proc sql;
  create table asie.operating_leverage9 as
  select a.*,b.*
    from asie.operating_leverage6 as a left join asie.operating_leverage8_HH_rank as b 
    on a.year=b.year 
    and a.month=b.month
    and a.gic=b.gic;
 run;

/** HH for ff48 **/

proc sort data=asie.operating_leverage9;
by year month ff48;
run;

proc means data=asie.operating_leverage9 noprint;
by year month ff48;
var SALE; 
output out=asie.operating_leverage9_sum sum=sum_ff48_SALE;
proc print;
run;

proc sql;
  create table asie.operating_leverage10 as
  select a.*,b.*
    from asie.operating_leverage9 as a left join asie.operating_leverage9_sum as b 
    on a.year=b.year 
    and a.month=b.month
    and a.ff48=b.ff48;
 run;

data asie.operating_leverage11;
set asie.operating_leverage10;
pct_sale_2=(SALE/sum_ff48_SALE)**2;
run;

proc sort data=asie.operating_leverage11;
by year month ff48;
run;

proc means data=asie.operating_leverage11 noprint;
by year month ff48;
var pct_sale_2; 
output out=asie.operating_leverage11_HH sum=HH_ff48_SALE;
proc print;
run;

proc sort data=asie.operating_leverage11_HH;
by year month HH_ff48_SALE;
run;

proc rank groups=2 data=asie.operating_leverage11_HH out=asie.operating_leverage11_HH_rank;
var HH_ff48_SALE; 
ranks r_HH_ff48_SALE;
by year month;
run;

data asie.ff48_HH;
set asie.operating_leverage11_HH_rank;
if ff48 ne .;
keep ff48 year month HH_ff48_SALE r_HH_ff48_SALE;
proc print;
run;

proc sql;
  create table asie.operating_leverage12 as
  select a.*,b.*
    from asie.operating_leverage9 as a left join asie.operating_leverage11_HH_rank as b 
    on a.year=b.year 
    and a.month=b.month
    and a.ff48=b.ff48;
 run;

data asie.GIC_ff48_OL_HH;
set asie.operating_leverage12;
keep year month tic r_GIC_OL1 r_ff48_OL1 r_HH_GIC_SALE r_HH_ff48_SALE;
run;


/****************************************************/
/** CODE 2: Code_HP_SIM **/
/****************************************************/

/** generate the firm's product similarity score **/

data asie.tnic3_data_2;
set asie.tnic3_data;
if year ge 1995 and year le 2017;
run;

proc sort data=asie.tnic3_data_2;
by year gvkey1 gvkey2;
run;

proc means data=asie.tnic3_data_2 noprint;
by year gvkey1;
var score;
output out=asie.sim sum=sim;
run;

data asie.sim2;
set asie.sim;
if sim=. then sim=0;
run;

data asie.permno_cusip_gvkey_tic_sic_y;
set asie.permno_cusip_gvkey_tic_sic;
year_y=year(date);
month_y=month(date);
ngvkey=gvkey*1;
if month_y=12;
keep tic ngvkey year_y;
run;

proc sql;
  create table asie.sim3 as
  select a.*,b.*
    from asie.sim2 as a left join asie.permno_cusip_gvkey_tic_sic_y as b 
    on a.gvkey1=b.ngvkey
	and a.year=b.year_y;
 run; 

proc sort data=asie.sim3 nodupkey;
by year gvkey1;
run;


/****************************************************/
/** CODE 3: Code_SimpleCount_Base **/
/****************************************************/

/** work on the base mention file **/

data asie.simple_count1;
set asie.sec_competition_new;
byear=year(fiscalmonth);
bmonth=month(fiscalmonth);
if filingticker='GOOG' then filingticker='GOOGL';
if mentionedticker='GOOG' then mentionedticker='GOOGL';
if filingticker='UA' then filingticker='UAA';
if mentionedticker='UA' then mentionedticker='UAA';
if filingticker='CENT' then filingticker='CENTA';
if mentionedticker='CENT' then mentionedticker='CENTA';
keep filingticker mentionedticker byear bmonth;
if mentionedticker ne '';
keep filingticker mentionedticker byear bmonth;
proc print;
run;

proc sort data=asie.simple_count1 nodupkey;
by filingticker mentionedticker byear bmonth;
run;

data asie.simple_count2;
set asie.simple_count1;
if byear ge 1995 and byear le 2017;
run;

/** add GIC industry codes for the filing firmns and the mentioned firms **/

proc sql;
  create table asie.simple_count2_a as
  select a.*,b.*
    from asie.simple_count2 as a left join asie.firm_gic_2 as b
    on a.filingticker=b.ticker 
    and a.byear=b.fyear
	and a.bmonth=b.fmonth;
 run;

data asie.simple_count2_b;
set asie.simple_count2_a;
f_gic=gic;
keep filingticker mentionedticker byear bmonth f_gic;
run;

proc sql;
  create table asie.simple_count2_c as
  select a.*,b.*
    from asie.simple_count2_b as a left join asie.firm_gic_2 as b
    on a.mentionedticker=b.ticker 
    and a.byear=b.fyear
	and a.bmonth=b.fmonth;
 run;

data asie.simple_count2_d;
set asie.simple_count2_c;
m_gic=gic;
keep filingticker mentionedticker byear bmonth f_gic m_gic;
run;

proc sort data=asie.simple_count2_d;
by filingticker byear bmonth mentionedticker;
run;

data asie.simple_count2_inter;
set asie.simple_count2_d;
if f_gic ne . and m_gic ne . and f_gic ne m_gic;
proc print;
run;

data asie.simple_count2_in;
set asie.simple_count2_d;
if f_gic ne . and m_gic ne . and f_gic=m_gic;
proc print;
run;

/** count only the mentioned firms; all firms: 'simple_count2', outsdie-industry: 'simple_count2_inter', inside-industry: 'simple_count2_in'**/

proc sort data=asie.simple_count2;
by mentionedticker byear bmonth;
run;

proc means data=asie.simple_count2 noprint;
by mentionedticker byear bmonth;
var byear; 
output out=asie.simple_count3 n=mention_count;
proc print data=asie.simple_count3;
run;

/** get mean and sum of firm size of the mentioning firms **/

data asie.main_file1;
set asie.firm_characteristics;
if year ge 1994;
keep tic year month l_size;
run;

proc sql;
  create table asie.simple_count2_size1 as
  select a.*,b.*
    from asie.simple_count2 as a left join asie.main_file1 as b /** change to 'simple_count2_inter' and 'simple_count2_in' **/
    on a.filingticker=b.tic 
    and a.byear=b.year
	and a.bmonth=b.month;
 run;

proc sort data=asie.simple_count2_size1 nodupkey;
by mentionedticker filingticker byear bmonth;
run;

proc sort data=asie.simple_count2_size1;
by mentionedticker byear bmonth;
run;

proc means data=asie.simple_count2_size1 noprint;
by mentionedticker byear bmonth;
var byear; 
output out=asie.simple_count3 n=mention_count;
proc print data=asie.simple_count3;
run;

proc means data=asie.simple_count2_size1 noprint;
by mentionedticker byear bmonth;
var l_size; 
output out=asie.simple_size3 n=size_count sum=sum_size max=max_size min=min_size;
proc print data=asie.simple_size3;
run;

proc sql;
  create table asie.simple_count3a as
  select a.*,b.*
    from asie.simple_count3 as a left join asie.simple_size3 as b
    on a.mentionedticker=b.mentionedticker 
    and a.byear=b.byear
	and a.bmonth=b.bmonth;
 run;

/*************************************/

data asie.mentioned_firms1;
set asie.simple_count3a;
keep mentionedticker;
run;

/** list of all mentioned firms **/

proc sort data=asie.mentioned_firms1 nodupkey;
by mentionedticker;
run;

/** add all months to all mentioned firms **/

proc print data=asie.months1;
run;

proc sql;
  create table asie.mentioned_firms2 as
  select a.*,b.*
    from asie.months1 as a, asie.mentioned_firms1 as b;
 run;

proc sort data=asie.mentioned_firms2;
by mentionedticker year month;
run;

/** merge the file with the mentiones to the file with all mentioned firms in all months **/

proc sql;
  create table asie.simple_count4 as
  select a.*,b.*
    from asie.mentioned_firms2 as a left join asie.simple_count3a as b
    on a.mentionedticker=b.mentionedticker 
    and a.year=b.byear
	and a.month=b.bmonth;
 run;

data asie.simple_count5;
set asie.simple_count4;
if mention_count=. then mention_count=0;
if size_count=. then size_count=0;
if sum_size= . then sum_size=0;
/**if max_size =. then max_size=0;
if min_size =. then min_size=0;**/
run;

/** sum all mentions in the last 12 months **/

proc sort data=asie.simple_count5;
by mentionedticker year month;
run;

data asie.simple_count6;
set asie.simple_count5;
l11mt=lag11(mentionedticker);

lag1mc=lag(mention_count);
lag2mc=lag2(mention_count);
lag3mc=lag3(mention_count);
lag4mc=lag4(mention_count);
lag5mc=lag5(mention_count);
lag6mc=lag6(mention_count);
lag7mc=lag7(mention_count);
lag8mc=lag8(mention_count);
lag9mc=lag9(mention_count);
lag10mc=lag10(mention_count);
lag11mc=lag11(mention_count);

lag1sc=lag(size_count);
lag2sc=lag2(size_count);
lag3sc=lag3(size_count);
lag4sc=lag4(size_count);
lag5sc=lag5(size_count);
lag6sc=lag6(size_count);
lag7sc=lag7(size_count);
lag8sc=lag8(size_count);
lag9sc=lag9(size_count);
lag10sc=lag10(size_count);
lag11sc=lag11(size_count);

lag1s_size=lag(sum_size);
lag2s_size=lag2(sum_size);
lag3s_size=lag3(sum_size);
lag4s_size=lag4(sum_size);
lag5s_size=lag5(sum_size);
lag6s_size=lag6(sum_size);
lag7s_size=lag7(sum_size);
lag8s_size=lag8(sum_size);
lag9s_size=lag9(sum_size);
lag10s_size=lag10(sum_size);
lag11s_size=lag11(sum_size);

lag1max_size=lag(max_size);
lag2max_size=lag2(max_size);
lag3max_size=lag3(max_size);
lag4max_size=lag4(max_size);
lag5max_size=lag5(max_size);
lag6max_size=lag6(max_size);
lag7max_size=lag7(max_size);
lag8max_size=lag8(max_size);
lag9max_size=lag9(max_size);
lag10max_size=lag10(max_size);
lag11max_size=lag11(max_size);

lag1min_size=lag(min_size);
lag2min_size=lag2(min_size);
lag3min_size=lag3(min_size);
lag4min_size=lag4(min_size);
lag5min_size=lag5(min_size);
lag6min_size=lag6(min_size);
lag7min_size=lag7(min_size);
lag8min_size=lag8(min_size);
lag9min_size=lag9(min_size);
lag10min_size=lag10(min_size);
lag11min_size=lag11(min_size);

if l11mt=mentionedticker then do;
simple_count=mention_count+lag1mc+lag2mc+lag3mc+lag4mc+lag5mc+lag6mc+lag7mc+lag8mc+lag9mc+lag10mc+lag11mc;
simple_size_count=size_count+lag1sc+lag2sc+lag3sc+lag4sc+lag5sc+lag6sc+lag7sc+lag8sc+lag9sc+lag10sc+lag11sc;
simple_sum_size=sum_size+lag1s_size+lag2s_size+lag3s_size+lag4s_size+lag5s_size+lag6s_size+lag7s_size+lag8s_size+lag9s_size+lag10s_size+lag11s_size;
simple_mean_size=simple_sum_size/simple_size_count;
simple_max_size=max(max_size,lag1max_size,lag2max_size,lag3max_size,lag4max_size,lag5max_size,lag6max_size,lag7max_size,lag8max_size,lag9max_size,lag10max_size,lag11max_size);
simple_min_size=min(min_size,lag1min_size,lag2min_size,lag3min_size,lag4min_size,lag5min_size,lag6min_size,lag7min_size,lag8min_size,lag9min_size,lag10min_size,lag11min_size);
end;
drop byear bmonth _TYPE_ _FREQ_ l11mt lag1mc lag2mc lag3mc lag4mc lag5mc lag6mc lag7mc lag8mc lag9mc lag10mc lag11mc
     l11mt lag1sc lag2sc lag3sc lag4sc lag5sc lag6sc lag7sc lag8sc lag9sc lag10sc lag11sc
     l11mt lag1s_size lag2s_size lag3s_size lag4s_size lag5s_size lag6s_size lag7s_size lag8s_size lag9s_size lag10s_size lag11s_size
     lag1max_size lag2max_size lag3max_size lag4max_size lag5max_size lag6max_size lag7max_size lag8max_size lag9max_size lag10max_size lag11max_size
     lag1min_size lag2min_size lag3min_size lag4min_size lag5min_size lag6min_size lag7min_size lag8min_size lag9min_size lag10min_size lag11min_size;
run;

/** generate lags in simple counts **/

proc sort data=asie.simple_count6;
by mentionedticker year month;
run;

data asie.simple_count7_all; /** change to simple_count7_inter and simple_count7_in **/
set asie.simple_count6;
 lt=lag3(mentionedticker);

 lag3simple_count=lag3(simple_count);
 lag3simple_sum_size=lag3(simple_sum_size);
 lag3simple_mean_size=lag3(simple_mean_size);
 lag3simple_max_size=lag3(simple_max_size);
 lag3simple_min_size=lag3(simple_min_size);
 lag3simple_size_count=lag3(simple_size_count);
 
 if lt=mentionedticker then do;

 l3simple_count=lag3simple_count;
 l3simple_sum_size=lag3simple_sum_size;
 l3simple_mean_size=lag3simple_mean_size;
 l3simple_max_size=lag3simple_max_size;
 l3simple_min_size=lag3simple_min_size;
 l3simple_size_count=lag3simple_size_count;
 
end;
drop lag3simple_count lag3simple_sum_size lag3simple_mean_size lag3simple_max_size lag3simple_min_size lag3simple_size_count;
run;


/****************************************************/
/** CODE 4: Code_SimpleCount_ff48 **/
/****************************************************/

/** work on the base mention file **/

data asie.simple_count1;
set asie.sec_competition_new;
byear=year(fiscalmonth);
bmonth=month(fiscalmonth);
if filingticker='GOOG' then filingticker='GOOGL';
if mentionedticker='GOOG' then mentionedticker='GOOGL';
if filingticker='UA' then filingticker='UAA';
if mentionedticker='UA' then mentionedticker='UAA';
if filingticker='CENT' then filingticker='CENTA';
if mentionedticker='CENT' then mentionedticker='CENTA';
keep filingticker mentionedticker byear bmonth;
if mentionedticker ne '';
keep filingticker mentionedticker byear bmonth;
proc print;
run;

proc sort data=asie.simple_count1 nodupkey;
by filingticker mentionedticker byear bmonth;
run;

data asie.simple_count2;
set asie.simple_count1;
if byear ge 1995 and byear le 2017;
run;

/** add ff48 industry codes for the filing firmns and the mentioned firms **/

data asie.SEC_page_rank_ff48;
set asie.ind_sic2_ff48_ff17; /** this is a file with industry classifications **/
year=year(date);
month=month(date);
keep tic year month ff48;
run;

proc sql;
  create table asie.simple_count2_a as
  select a.*,b.*
    from asie.simple_count2 as a left join asie.SEC_page_rank_ff48 as b
    on a.filingticker=b.tic 
    and a.byear=b.year
	and a.bmonth=b.month;
 run;

data asie.simple_count2_b;
set asie.simple_count2_a;
f_ff48=ff48;
keep filingticker mentionedticker byear bmonth f_ff48;
run;

proc sql;
  create table asie.simple_count2_c as
  select a.*,b.*
    from asie.simple_count2_b as a left join asie.SEC_page_rank_ff48 as b
    on a.mentionedticker=b.tic 
    and a.byear=b.year
	and a.bmonth=b.month;
 run;

data asie.simple_count2_d;
set asie.simple_count2_c;
m_ff48=ff48;
keep filingticker mentionedticker byear bmonth f_ff48 m_ff48;
run;

proc sort data=asie.simple_count2_d;
by filingticker byear bmonth mentionedticker;
run;

data asie.simple_count2_inter;
set asie.simple_count2_d;
if f_ff48 ne . and m_ff48 ne . and f_ff48 ne m_ff48;
proc print;
run;

data asie.simple_count2_in;
set asie.simple_count2_d;
if f_ff48 ne . and m_ff48 ne . and f_ff48=m_ff48;
proc print;
run;

/** count only the mentioned firms; all firms: 'simple_count2', outsdie-industry: 'simple_count2_inter', inside-industry: 'simple_count2_in'**/

proc sort data=asie.simple_count2;
by mentionedticker byear bmonth;
run;

proc means data=asie.simple_count2 noprint;
by mentionedticker byear bmonth;
var byear; 
output out=asie.simple_count3 n=mention_count;
proc print data=asie.simple_count3;
run;

/** get mean and sum of firm size of the mentioning firms **/

data asie.main_file1;
set asie.firm_characteristics;
if year ge 1994;
keep tic year month l_size;
run;

proc sql;
  create table asie.simple_count2_size1 as
  select a.*,b.*
    from asie.simple_count2 as a left join asie.main_file1 as b /** change to 'simple_count2_inter' and 'simple_count2_in' **/
    on a.filingticker=b.tic 
    and a.byear=b.year
	and a.bmonth=b.month;
 run;

proc sort data=asie.simple_count2_size1 nodupkey;
by mentionedticker filingticker byear bmonth;
run;

proc sort data=asie.simple_count2_size1;
by mentionedticker byear bmonth;
run;

proc means data=asie.simple_count2_size1 noprint;
by mentionedticker byear bmonth;
var byear; 
output out=asie.simple_count3 n=mention_count;
proc print data=asie.simple_count3;
run;

proc means data=asie.simple_count2_size1 noprint;
by mentionedticker byear bmonth;
var l_size; 
output out=asie.simple_size3 n=size_count sum=sum_size max=max_size min=min_size;
proc print data=asie.simple_size3;
run;

proc sql;
  create table asie.simple_count3a as
  select a.*,b.*
    from asie.simple_count3 as a left join asie.simple_size3 as b
    on a.mentionedticker=b.mentionedticker 
    and a.byear=b.byear
	and a.bmonth=b.bmonth;
 run;

/*************************************/

data asie.mentioned_firms1;
set asie.simple_count3a;
keep mentionedticker;
run;

/** list of all mentioned firms **/

proc sort data=asie.mentioned_firms1 nodupkey;
by mentionedticker;
run;

/** add all months to all mentioned firms **/

proc print data=asie.months1;
run;

proc sql;
  create table asie.mentioned_firms2 as
  select a.*,b.*
    from asie.months1 as a, asie.mentioned_firms1 as b;
 run;

proc sort data=asie.mentioned_firms2;
by mentionedticker year month;
run;

/** merge the file with the mentiones to the file with all mentioned firms in all months **/

proc sql;
  create table asie.simple_count4 as
  select a.*,b.*
    from asie.mentioned_firms2 as a left join asie.simple_count3a as b
    on a.mentionedticker=b.mentionedticker 
    and a.year=b.byear
	and a.month=b.bmonth;
 run;

data asie.simple_count5;
set asie.simple_count4;
if mention_count=. then mention_count=0;
if size_count=. then size_count=0;
if sum_size= . then sum_size=0;
/**if max_size =. then max_size=0;
if min_size =. then min_size=0;**/
run;

/** sum all mentions in the last 12 months **/

proc sort data=asie.simple_count5;
by mentionedticker year month;
run;

data asie.simple_count6;
set asie.simple_count5;
l11mt=lag11(mentionedticker);

lag1mc=lag(mention_count);
lag2mc=lag2(mention_count);
lag3mc=lag3(mention_count);
lag4mc=lag4(mention_count);
lag5mc=lag5(mention_count);
lag6mc=lag6(mention_count);
lag7mc=lag7(mention_count);
lag8mc=lag8(mention_count);
lag9mc=lag9(mention_count);
lag10mc=lag10(mention_count);
lag11mc=lag11(mention_count);

lag1sc=lag(size_count);
lag2sc=lag2(size_count);
lag3sc=lag3(size_count);
lag4sc=lag4(size_count);
lag5sc=lag5(size_count);
lag6sc=lag6(size_count);
lag7sc=lag7(size_count);
lag8sc=lag8(size_count);
lag9sc=lag9(size_count);
lag10sc=lag10(size_count);
lag11sc=lag11(size_count);

lag1s_size=lag(sum_size);
lag2s_size=lag2(sum_size);
lag3s_size=lag3(sum_size);
lag4s_size=lag4(sum_size);
lag5s_size=lag5(sum_size);
lag6s_size=lag6(sum_size);
lag7s_size=lag7(sum_size);
lag8s_size=lag8(sum_size);
lag9s_size=lag9(sum_size);
lag10s_size=lag10(sum_size);
lag11s_size=lag11(sum_size);

lag1max_size=lag(max_size);
lag2max_size=lag2(max_size);
lag3max_size=lag3(max_size);
lag4max_size=lag4(max_size);
lag5max_size=lag5(max_size);
lag6max_size=lag6(max_size);
lag7max_size=lag7(max_size);
lag8max_size=lag8(max_size);
lag9max_size=lag9(max_size);
lag10max_size=lag10(max_size);
lag11max_size=lag11(max_size);

lag1min_size=lag(min_size);
lag2min_size=lag2(min_size);
lag3min_size=lag3(min_size);
lag4min_size=lag4(min_size);
lag5min_size=lag5(min_size);
lag6min_size=lag6(min_size);
lag7min_size=lag7(min_size);
lag8min_size=lag8(min_size);
lag9min_size=lag9(min_size);
lag10min_size=lag10(min_size);
lag11min_size=lag11(min_size);

if l11mt=mentionedticker then do;
simple_count=mention_count+lag1mc+lag2mc+lag3mc+lag4mc+lag5mc+lag6mc+lag7mc+lag8mc+lag9mc+lag10mc+lag11mc;
simple_size_count=size_count+lag1sc+lag2sc+lag3sc+lag4sc+lag5sc+lag6sc+lag7sc+lag8sc+lag9sc+lag10sc+lag11sc;
simple_sum_size=sum_size+lag1s_size+lag2s_size+lag3s_size+lag4s_size+lag5s_size+lag6s_size+lag7s_size+lag8s_size+lag9s_size+lag10s_size+lag11s_size;
simple_mean_size=simple_sum_size/simple_size_count;
simple_max_size=max(max_size,lag1max_size,lag2max_size,lag3max_size,lag4max_size,lag5max_size,lag6max_size,lag7max_size,lag8max_size,lag9max_size,lag10max_size,lag11max_size);
simple_min_size=min(min_size,lag1min_size,lag2min_size,lag3min_size,lag4min_size,lag5min_size,lag6min_size,lag7min_size,lag8min_size,lag9min_size,lag10min_size,lag11min_size);
end;
drop byear bmonth _TYPE_ _FREQ_ l11mt lag1mc lag2mc lag3mc lag4mc lag5mc lag6mc lag7mc lag8mc lag9mc lag10mc lag11mc
     l11mt lag1sc lag2sc lag3sc lag4sc lag5sc lag6sc lag7sc lag8sc lag9sc lag10sc lag11sc
     l11mt lag1s_size lag2s_size lag3s_size lag4s_size lag5s_size lag6s_size lag7s_size lag8s_size lag9s_size lag10s_size lag11s_size
     lag1max_size lag2max_size lag3max_size lag4max_size lag5max_size lag6max_size lag7max_size lag8max_size lag9max_size lag10max_size lag11max_size
     lag1min_size lag2min_size lag3min_size lag4min_size lag5min_size lag6min_size lag7min_size lag8min_size lag9min_size lag10min_size lag11min_size;
run;

/** generate lags in simple counts **/

proc sort data=asie.simple_count6;
by mentionedticker year month;
run;

data asie.simple_count7; /** change to simple_count7_inter_ff48 and simple_count7_in_ff48 **/
set asie.simple_count6;
 lt=lag3(mentionedticker);

 lag3simple_count=lag3(simple_count);
 lag3simple_sum_size=lag3(simple_sum_size);
 lag3simple_mean_size=lag3(simple_mean_size);
 lag3simple_max_size=lag3(simple_max_size);
 lag3simple_min_size=lag3(simple_min_size);
 lag3simple_size_count=lag3(simple_size_count);
 
 if lt=mentionedticker then do;

 l3simple_count=lag3simple_count;
 l3simple_sum_size=lag3simple_sum_size;
 l3simple_mean_size=lag3simple_mean_size;
 l3simple_max_size=lag3simple_max_size;
 l3simple_min_size=lag3simple_min_size;
 l3simple_size_count=lag3simple_size_count;
 
end;
drop lag3simple_count lag3simple_sum_size lag3simple_mean_size lag3simple_max_size lag3simple_min_size lag3simple_size_count;
run;


/****************************************************/
/** CODE 5: Code_SimpleCount_hp **/
/****************************************************/

/** work on the base mention file **/

data asie.simple_count1;
set asie.sec_competition_new;
byear=year(fiscalmonth);
bmonth=month(fiscalmonth);
if filingticker='GOOG' then filingticker='GOOGL';
if mentionedticker='GOOG' then mentionedticker='GOOGL';
if filingticker='UA' then filingticker='UAA';
if mentionedticker='UA' then mentionedticker='UAA';
if filingticker='CENT' then filingticker='CENTA';
if mentionedticker='CENT' then mentionedticker='CENTA';
keep filingticker mentionedticker byear bmonth;
if mentionedticker ne '';
keep filingticker mentionedticker byear bmonth;
proc print;
run;

proc sort data=asie.simple_count1 nodupkey;
by filingticker mentionedticker byear bmonth;
run;

data asie.simple_count2;
set asie.simple_count1;
if byear ge 1995 and byear le 2017;
run;

/** leave only conenctions where both the filing and the mentioed firms have gvkey **/

data asie.permno_cusip_gvkey_tic_sic_ym;
set asie.permno_cusip_gvkey_tic_sic;
year_y=year(date);
month_y=month(date);
keep tic gvkey year_y month_y;
run;

data asie.gvkey_tic_sic_2_a;
set asie.permno_cusip_gvkey_tic_sic_ym;
tic_filing=tic;
gvkey_filing=gvkey;
keep year_y month_y tic_filing gvkey_filing;
run;

proc sql;
  create table asie.gvkey_tic_sic_2_b as
  select a.*,b.*
    from asie.simple_count2 as a left join asie.gvkey_tic_sic_2_a as b
    on a.filingticker=b.tic_filing
    and a.byear=b.year_y
    and a.bmonth=b.month_y;
 run;

proc sort data=asie.gvkey_tic_sic_2_b nodupkey;
by filingticker mentionedticker byear bmonth;
run; 

data asie.gvkey_tic_sic_2_c;
set asie.permno_cusip_gvkey_tic_sic_ym;
tic_mentioned=tic;
gvkey_mentioned=gvkey;
keep year_y month_y tic_mentioned gvkey_mentioned;
run;

proc sql;
  create table asie.gvkey_tic_sic_2_d as
  select a.*,b.*
    from asie.simple_count2 as a left join asie.gvkey_tic_sic_2_c as b
    on a.mentionedticker=b.tic_mentioned
    and a.byear=b.year_y
    and a.bmonth=b.month_y;
 run;

proc sort data=asie.gvkey_tic_sic_2_d nodupkey;
by filingticker mentionedticker byear bmonth;
run; 

proc sql;
  create table asie.simple_count2_a as
  select a.*,b.*
    from asie.gvkey_tic_sic_2_b as a left join asie.gvkey_tic_sic_2_d as b
    on a.filingticker=b.filingticker
    and a.mentionedticker=b.mentionedticker
    and a.byear=b.byear
    and a.bmonth=b.bmonth;
 run;

 data asie.simple_count22;
 set asie.simple_count2_a;
 if gvkey_filing ne .;
 if gvkey_mentioned ne .;
 run;


/** get the TNIC3 data from Hoberg and Phillips data library **/

proc import datafile='c:\sas files\tnic3_data.txt' out=asie.tnic3_data dbms=tab replace; 
   getnames=yes;
run;

/** get the ticker for the TNIC file **/

data asie.tnic3_data_a;
set asie.tnic3_data;
ngvkey1=gvkey1*1;
ngvkey2=gvkey2*1;
run;

data asie.permno_cusip_gvkey_tic_sic_y;
set asie.permno_cusip_gvkey_tic_sic;
year_y=year(date);
month_y=month(date);
ngvkey=gvkey*1;
if month_y=12;
keep tic ngvkey year_y;
run;

proc sql;
  create table asie.tnic3_data_b as
  select a.*,b.*
    from asie.tnic3_data_a as a left join asie.permno_cusip_gvkey_tic_sic_y as b 
    on a.ngvkey1=b.ngvkey
	and a.year=b.year_y;
 run; 

proc sort data=asie.tnic3_data_b nodupkey;
by year ngvkey1 ngvkey2;
run;

data asie.tnic3_data_c;
set asie.tnic3_data_b;
tic1=tic;
keep year ngvkey1 ngvkey2 tic1 score;
run;

proc sql;
  create table asie.tnic3_data_d as
  select a.*,b.*
    from asie.tnic3_data_c as a left join asie.permno_cusip_gvkey_tic_sic_y as b 
    on a.ngvkey2=b.ngvkey
	and a.year=b.year_y;
 run; 

proc sort data=asie.tnic3_data_d nodupkey;
by year ngvkey1 ngvkey2;
run;

data asie.tnic3_data_e;
set asie.tnic3_data_d;
tic2=tic;
if tic1 ne '';
if tic2 ne '';
keep year ngvkey1 ngvkey2 tic1 tic2 score;
run;

/** merge the files **/

proc sql;
  create table asie.simple_count2_a as
  select a.*,b.*
    from asie.simple_count22 as a left join asie.tnic3_data_e as b
    on a.filingticker=b.tic1
    and a.mentionedticker=b.tic2
    and a.byear=b.year;
 run;

proc sort data=asie.simple_count2_a nodupkey;
by filingticker mentionedticker byear bmonth;
run;

proc sort data=asie.simple_count2_a;
by filingticker byear bmonth mentionedticker;
run;

data asie.simple_count2_inter;
set asie.simple_count2_a;
if score = .;
proc print;
run;

data asie.simple_count2_in;
set asie.simple_count2_a;
if score ne .;
proc print;
run;

/** count only the mentioned firms; all firms: 'simple_count2', outsdie-industry: 'simple_count2_inter', inside-industry: 'simple_count2_in'**/

proc sort data=asie.simple_count2;
by mentionedticker byear bmonth;
run;

proc means data=asie.simple_count2 noprint;
by mentionedticker byear bmonth;
var byear; 
output out=asie.simple_count3 n=mention_count;
proc print data=asie.simple_count3;
run;

/** get mean and sum of firm size of the mentioning firms **/

data asie.main_file1;
set asie.firm_characteristics;
if year ge 1994;
keep tic year month l_size;
run;

proc sql;
  create table asie.simple_count2_size1 as
  select a.*,b.*
    from asie.simple_count2 as a left join asie.main_file1 as b /** change to 'simple_count2_inter' and 'simple_count2_in' **/
    on a.filingticker=b.tic 
    and a.byear=b.year
	and a.bmonth=b.month;
 run;

proc sort data=asie.simple_count2_size1 nodupkey;
by mentionedticker filingticker byear bmonth;
run;

proc sort data=asie.simple_count2_size1;
by mentionedticker byear bmonth;
run;

proc means data=asie.simple_count2_size1 noprint;
by mentionedticker byear bmonth;
var byear; 
output out=asie.simple_count3 n=mention_count;
proc print data=asie.simple_count3;
run;

proc means data=asie.simple_count2_size1 noprint;
by mentionedticker byear bmonth;
var l_size; 
output out=asie.simple_size3 n=size_count sum=sum_size max=max_size min=min_size;
proc print data=asie.simple_size3;
run;

proc sql;
  create table asie.simple_count3a as
  select a.*,b.*
    from asie.simple_count3 as a left join asie.simple_size3 as b
    on a.mentionedticker=b.mentionedticker 
    and a.byear=b.byear
	and a.bmonth=b.bmonth;
 run;

/*************************************/

data asie.mentioned_firms1;
set asie.simple_count3a;
keep mentionedticker;
run;

/** list of all mentioned firms **/

proc sort data=asie.mentioned_firms1 nodupkey;
by mentionedticker;
run;

/** add all months to all mentioned firms **/

proc print data=asie.months1;
run;

proc sql;
  create table asie.mentioned_firms2 as
  select a.*,b.*
    from asie.months1 as a, asie.mentioned_firms1 as b;
 run;

proc sort data=asie.mentioned_firms2;
by mentionedticker year month;
run;

/** merge the file with the mentiones to the file with all mentioned firms in all months **/

proc sql;
  create table asie.simple_count4 as
  select a.*,b.*
    from asie.mentioned_firms2 as a left join asie.simple_count3a as b
    on a.mentionedticker=b.mentionedticker 
    and a.year=b.byear
	and a.month=b.bmonth;
 run;

data asie.simple_count5;
set asie.simple_count4;
if mention_count=. then mention_count=0;
if size_count=. then size_count=0;
if sum_size= . then sum_size=0;
/**if max_size =. then max_size=0;
if min_size =. then min_size=0;**/
run;

/** sum all mentions in the last 12 months **/

proc sort data=asie.simple_count5;
by mentionedticker year month;
run;

data asie.simple_count6;
set asie.simple_count5;
l11mt=lag11(mentionedticker);

lag1mc=lag(mention_count);
lag2mc=lag2(mention_count);
lag3mc=lag3(mention_count);
lag4mc=lag4(mention_count);
lag5mc=lag5(mention_count);
lag6mc=lag6(mention_count);
lag7mc=lag7(mention_count);
lag8mc=lag8(mention_count);
lag9mc=lag9(mention_count);
lag10mc=lag10(mention_count);
lag11mc=lag11(mention_count);

lag1sc=lag(size_count);
lag2sc=lag2(size_count);
lag3sc=lag3(size_count);
lag4sc=lag4(size_count);
lag5sc=lag5(size_count);
lag6sc=lag6(size_count);
lag7sc=lag7(size_count);
lag8sc=lag8(size_count);
lag9sc=lag9(size_count);
lag10sc=lag10(size_count);
lag11sc=lag11(size_count);

lag1s_size=lag(sum_size);
lag2s_size=lag2(sum_size);
lag3s_size=lag3(sum_size);
lag4s_size=lag4(sum_size);
lag5s_size=lag5(sum_size);
lag6s_size=lag6(sum_size);
lag7s_size=lag7(sum_size);
lag8s_size=lag8(sum_size);
lag9s_size=lag9(sum_size);
lag10s_size=lag10(sum_size);
lag11s_size=lag11(sum_size);

lag1max_size=lag(max_size);
lag2max_size=lag2(max_size);
lag3max_size=lag3(max_size);
lag4max_size=lag4(max_size);
lag5max_size=lag5(max_size);
lag6max_size=lag6(max_size);
lag7max_size=lag7(max_size);
lag8max_size=lag8(max_size);
lag9max_size=lag9(max_size);
lag10max_size=lag10(max_size);
lag11max_size=lag11(max_size);

lag1min_size=lag(min_size);
lag2min_size=lag2(min_size);
lag3min_size=lag3(min_size);
lag4min_size=lag4(min_size);
lag5min_size=lag5(min_size);
lag6min_size=lag6(min_size);
lag7min_size=lag7(min_size);
lag8min_size=lag8(min_size);
lag9min_size=lag9(min_size);
lag10min_size=lag10(min_size);
lag11min_size=lag11(min_size);

if l11mt=mentionedticker then do;
simple_count=mention_count+lag1mc+lag2mc+lag3mc+lag4mc+lag5mc+lag6mc+lag7mc+lag8mc+lag9mc+lag10mc+lag11mc;
simple_size_count=size_count+lag1sc+lag2sc+lag3sc+lag4sc+lag5sc+lag6sc+lag7sc+lag8sc+lag9sc+lag10sc+lag11sc;
simple_sum_size=sum_size+lag1s_size+lag2s_size+lag3s_size+lag4s_size+lag5s_size+lag6s_size+lag7s_size+lag8s_size+lag9s_size+lag10s_size+lag11s_size;
simple_mean_size=simple_sum_size/simple_size_count;
simple_max_size=max(max_size,lag1max_size,lag2max_size,lag3max_size,lag4max_size,lag5max_size,lag6max_size,lag7max_size,lag8max_size,lag9max_size,lag10max_size,lag11max_size);
simple_min_size=min(min_size,lag1min_size,lag2min_size,lag3min_size,lag4min_size,lag5min_size,lag6min_size,lag7min_size,lag8min_size,lag9min_size,lag10min_size,lag11min_size);
end;
drop byear bmonth _TYPE_ _FREQ_ l11mt lag1mc lag2mc lag3mc lag4mc lag5mc lag6mc lag7mc lag8mc lag9mc lag10mc lag11mc
     l11mt lag1sc lag2sc lag3sc lag4sc lag5sc lag6sc lag7sc lag8sc lag9sc lag10sc lag11sc
     l11mt lag1s_size lag2s_size lag3s_size lag4s_size lag5s_size lag6s_size lag7s_size lag8s_size lag9s_size lag10s_size lag11s_size
     lag1max_size lag2max_size lag3max_size lag4max_size lag5max_size lag6max_size lag7max_size lag8max_size lag9max_size lag10max_size lag11max_size
     lag1min_size lag2min_size lag3min_size lag4min_size lag5min_size lag6min_size lag7min_size lag8min_size lag9min_size lag10min_size lag11min_size;
run;

/** generate lags in simple counts **/

proc sort data=asie.simple_count6;
by mentionedticker year month;
run;

data asie.simple_count7; /** change to simple_count7_inter_hp and simple_count7_in_hp **/
set asie.simple_count6;
 lt=lag3(mentionedticker);

 lag3simple_count=lag3(simple_count);
 lag3simple_sum_size=lag3(simple_sum_size);
 lag3simple_mean_size=lag3(simple_mean_size);
 lag3simple_max_size=lag3(simple_max_size);
 lag3simple_min_size=lag3(simple_min_size);
 lag3simple_size_count=lag3(simple_size_count);
 
 if lt=mentionedticker then do;

 l3simple_count=lag3simple_count;
 l3simple_sum_size=lag3simple_sum_size;
 l3simple_mean_size=lag3simple_mean_size;
 l3simple_max_size=lag3simple_max_size;
 l3simple_min_size=lag3simple_min_size;
 l3simple_size_count=lag3simple_size_count;
 
end;
drop lag3simple_count lag3simple_sum_size lag3simple_mean_size lag3simple_max_size lag3simple_min_size lag3simple_size_count;
run;


/****************************************************/
/** CODE 6: fundamentals_annual4 **/
/****************************************************/

%let wrds = wrds-cloud.wharton.upenn.edu 4016; 
options comamid=TCP remote=WRDS;
signon username=_prompt_;

/** MERGING CRSP AND COMPUSTAT **/

/************************************************************************************
* STEP ONE: Extract Compustat data; 
************************************************************************************/
rsubmit;
data compx2;
   set comp.funda (keep = gvkey tic fyear fyr datadate SALE COGS AT INDFMT DATAFMT POPSRC CONSOL at lt csho prcc_f CEQ DLC DLTT DD1 DD2 DD3 DD4 DD5 
                          OIBDP IB XINT TXT SPI WCAP ACT LCT FOPT RE LT SALE XSGA DV NI DM DCVT SICH/**SPLTICRM SIC**/ che txp dp AJEX XRD
                          MIB PSTK PPEGT INVT TXDI); 
   if indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C';
   /** create begin and end dates of fiscal year**/
   format endfyr begfyr date9.;
   endfyr= datadate;
   begfyr= intnx('month',endfyr,-11,'beg');  /* intnx(interval, from, n, 'aligment') */
run;

proc sort; by gvkey endfyr; 
run;

/*** step 1a***
merge with the current SIC codes
***/
rsubmit;
proc sql;
  create table compx3 as select *, b.sic, b.conm
  from compx2 as a, comp.names as b
  where a.gvkey = b.gvkey and
  year(a.datadate) le b.year2 and  year(a.datadate) ge b.year1;                
quit;

/*** step 1b***
add credit rating
***/
rsubmit;
proc sql;
create table compx4 as select a.*, b.splticrm as br
from compx3 a left join comp.adsprate b
on (a.gvkey = b.gvkey) and
(a.datadate = b.datadate);
quit; 

/*******************************************************************************************
* STEP TWO: Link GVKEYS to CRSP Identifiers;                                               *
* Use CCMXPF_LINKTABLE table to obtain CRSP identifiers for our subset of companies/dates; *
********************************************************************************************/

rsubmit;
data ccmxpf_linktable1;
set crsp.ccmxpf_linktable;;
gvkey1=gvkey*1;
run;

data compx5;
set compx4;
gvkey1=gvkey*1;
run;

rsubmit;
proc sql;
  create table mydata as select *
  from compx5 as a, ccmxpf_linktable1 as b
  where a.gvkey1 = b.gvkey1 and
  b.LINKTYPE in ("LU","LC","LD","LN","LS","LX") and
  b.usedflag=1 and (b.LINKDT <= a.endfyr or b.LINKDT = .B) and (a.endfyr <= b.LINKENDDT or b.LINKENDDT = .E);                
quit;

/***************************************************************************************
* STEP THREE: Add CRSP Monthly price data;
***************************************************************************************/

rsubmit;
proc sql;
    create table mydata2 as select *
    from mydata as a, crsp.msf as b
    where a.lpermno = b.permno 
    and	intck('month',a.endfyr,b.date) between 6 and 17;
quit;

/***************************************************************************************
* STEP FOUR: Creating variables;
***************************************************************************************/
rsubmit;
proc sort data=mydata2; by permno date;
run;

data file1;
set mydata2;
size=abs(prc)*shrout;
mtb=(size/1000)/ceq;
ebit=IB+XINT+TXT+SPI;
Payout=DV/ebit;
run;

data file2;
set file1;
/**keep Gvkey1 Tic PERMNO CUSIP Fyear Fyr Begfyr Endfyr DATE 
     AT CEQ Dlc Dltt DD1 DD2 DD3 DD4 DD5 HEXCD Size PRC RET RET1 MTB EBIT Payout br SICH sic;**/
run;

***************************************************************
* Data cleaning
***************************************************************;
rsubmit;
proc sql;
create table mydata6 as select a.*, b.HSHRCD
from file2 as a left join crsp.msfhdr as b
on a.permno = b.permno;
quit;


data mydata7; set mydata6; 
if (HSHRCD > 14) or  (HSHRCD < 10) then delete; 
run;  

***************************************************************
* Craeting lag size and btm
***************************************************************;

rsubmit;
proc sort data=mydata7;
by permno date;
run;

rsubmit;
data mydata7a;
set mydata7;
lp=lag(permno);
lagsize=lag(size);
lagmtb=lag(mtb);
if lp=permno then do;
lsize=lagsize;
lmtb=lagmtb;
end;
drop lp lagsize lagmtb;
run;

***************************************************************
* Craeting the annual fundamentels
***************************************************************;

rsubmit;
data mydata8;
set mydata7a;
year=year(date);
month=month(date);
if OIBDP<0 then OIBDP=.;
if NI<0 then NI=.;
run;

rsubmit;
proc sort data=mydata8;
by permno date;
run;

rsubmit;
data mydata9;
set mydata8;
lmonth=lag12(month);
lp=lag12(permno);
lagAT=lag12(AT);
if lp=permno and lmonth=month then do;
l_AT=lagAT;
end;
drop lmonth lp lagAT;
run;

rsubmit;
data mydata10;
set mydata9;
SALE_lAT=SALE/l_AT;
OIBDP_lAT=OIBDP/l_AT;
NI_lAT=NI/l_AT;
run;

rsubmit;
proc sort data=mydata10;
by permno date;
run;

rsubmit;
data ann_fund2;
set mydata10;
lmonth=lag12(month);
lp=lag12(permno);
lagsize=lag12(size);
lagAT=lag12(AT);
lagSALE=lag12(SALE);
lagOIBDP=lag12(OIBDP);
lagNI=lag12(IB);
lagSALE_lAT=lag12(SALE_lAT);
lagOIBDP_lAT=lag12(OIBDP_lAT);
lagNI_lAT=lag12(NI_lAT);
if lp=permno and lmonth=month then do;
l_size=lagsize;
l_AT=lagAT;
l_SALE=lagSALE;
l_OIBDP=lagOIBDP;
l_NI=lagNI;
l_SALE_lAT=lagSALE_lAT;
l_OIBDP_lAT=lagOIBDP_lAT;
l_NI_lAT=lagNI_lAT;
end;
drop lmonth lp lagsize lagAT lagSALE lagOIBDP lagNI lagSALE_lAT lagOIBDP_lAT lagNI_lAT;
run;
rsubmit;
proc sort data=ann_fund2;
by permno descending date;
run;
rsubmit;
data ann_fund3;
set ann_fund2;
lmonth=lag12(month);
lp=lag12(permno);
lagsize=lag12(size);
lagAT=lag12(AT);
lagSALE=lag12(SALE);
lagOIBDP=lag12(OIBDP);
lagNI=lag12(NI);
lagSALE_lAT=lag12(SALE_lAT);
lagOIBDP_lAT=lag12(OIBDP_lAT);
lagNI_lAT=lag12(NI_lAT);
if lp=permno and lmonth=month then do;
f_size=lagsize;
f_AT=lagAT;
f_SALE=lagSALE;
f_OIBDP=lagOIBDP;
f_NI=lagNI;
f_SALE_lAT=lagSALE_lAT;
f_OIBDP_lAT=lagOIBDP_lAT;
f_NI_lAT=lagNI_lAT;
end;
drop lmonth lp lagsize lagAT lagSALE lagOIBDP lagNI lagSALE_lAT lagOIBDP_lAT lagNI_lAT;
run;

proc sort data=ann_fund3;
by permno descending date;
run;

data ann_fund4;
set ann_fund3;
lmonth=lag24(month);
lp=lag24(permno);
lagsize=lag24(size);
lagAT=lag24(AT);
lagSALE=lag24(SALE);
lagOIBDP=lag24(OIBDP);
lagNI=lag24(NI);
lagSALE_lAT=lag24(SALE_lAT);
lagOIBDP_lAT=lag24(OIBDP_lAT);
lagNI_lAT=lag24(NI_lAT);
if lp=permno and lmonth=month then do;
f2_size=lagsize;
f2_AT=lagAT;
f2_SALE=lagSALE;
f2_OIBDP=lagOIBDP;
f2_NI=lagNI;
f2_SALE_lAT=lagSALE_lAT;
f2_OIBDP_lAT=lagOIBDP_lAT;
f2_NI_lAT=lagNI_lAT;
end;
drop lmonth lp lagsize lagAT lagSALE lagOIBDP lagNI lagSALE_lAT lagOIBDP_lAT lagNI_lAT;
run;

proc sort data=ann_fund4;
by permno descending date;
run;

data ann_fund5;
set ann_fund4;
lmonth=lag36(month);
lp=lag36(permno);
lagsize=lag36(size);
lagAT=lag36(AT);
lagSALE=lag36(SALE);
lagOIBDP=lag36(OIBDP);
lagNI=lag36(NI);
lagSALE_lAT=lag36(SALE_lAT);
lagOIBDP_lAT=lag36(OIBDP_lAT);
lagNI_lAT=lag36(NI_lAT);
if lp=permno and lmonth=month then do;
f3_size=lagsize;
f3_AT=lagAT;
f3_SALE=lagSALE;
f3_OIBDP=lagOIBDP;
f3_NI=lagNI;
f3_SALE_lAT=lagSALE_lAT;
f3_OIBDP_lAT=lagOIBDP_lAT;
f3_NI_lAT=lagNI_lAT;
end;
drop lmonth lp lagsize lagAT lagSALE lagOIBDP lagNI lagSALE_lAT lagOIBDP_lAT lagNI_lAT;
run;

proc sort data=ann_fund5;
by permno date;
run;

data ann_fund6;
set ann_fund5;

SaleGrowth_0_l1=SALE/l_SALE-1;
OIBDPGrowth_0_l1=OIBDP/l_OIBDP-1;
SALE_lATGrowth_0_l1=SALE_lAT/l_SALE_lAT-1;
OIBDP_lATGrowth_0_l1=OIBDP_lAT/l_OIBDP_lAT-1;

SaleGrowth_1_l1=f_SALE/l_SALE-1;
OIBDPGrowth_1_l1=f_OIBDP/l_OIBDP-1;
SALE_lATGrowth_1_l1=f_SALE_lAT/l_SALE_lAT-1;
OIBDP_lATGrowth_1_l1=f_OIBDP_lAT/l_OIBDP_lAT-1;

SaleGrowth_2_l1=f2_SALE/l_SALE-1;
OIBDPGrowth_2_l1=f2_OIBDP/l_OIBDP-1;
SALE_lATGrowth_2_l1=f2_SALE_lAT/l_SALE_lAT-1;
OIBDP_lATGrowth_2_l1=f2_OIBDP_lAT/l_OIBDP_lAT-1;

SaleGrowth_3_l1=f3_SALE/l_SALE-1;
OIBDPGrowth_3_l1=f3_OIBDP/l_OIBDP-1;
SALE_lATGrowth_3_l1=f3_SALE_lAT/l_SALE_lAT-1;
OIBDP_lATGrowth_3_l1=f3_OIBDP_lAT/l_OIBDP_lAT-1;

SaleGrowth_1_0=f_SALE/SALE-1;
OIBDPGrowth_1_0=f_OIBDP/OIBDP-1;
SALE_lATGrowth_1_0=f_SALE_lAT/SALE_lAT-1;
OIBDP_lATGrowth_1_0=f_OIBDP_lAT/OIBDP_lAT-1;

SaleGrowth_2_0=f2_SALE/SALE-1;
OIBDPGrowth_2_0=f2_OIBDP/OIBDP-1;
SALE_lATGrowth_2_0=f2_SALE_lAT/SALE_lAT-1;
OIBDP_lATGrowth_2_0=f2_OIBDP_lAT/OIBDP_lAT-1;

SaleGrowth_3_0=f3_SALE/SALE-1;
OIBDPGrowth_3_0=f3_OIBDP/OIBDP-1;
SALE_lATGrowth_3_0=f3_SALE_lAT/SALE_lAT-1;
OIBDP_lATGrowth_3_0=f3_OIBDP_lAT/OIBDP_lAT-1;

run;

rsubmit;
data ann_fund7;
set ann_fund6;
year=year(date);
month=month(date);
if year ge 1995 and year le 2017;
keep permno tic date year month fyear datadate sic sich lsize lmtb prc

l_size l_AT l_SALE l_OIBDP l_SALE_lAT l_OIBDP_lAT 
size AT SALE OIBDP SALE_lAT OIBDP_lAT 
f_size f_AT f_SALE f_OIBDP f_SALE_lAT f_OIBDP_lAT 
f2_size f2_AT f2_SALE f2_OIBDP f2_SALE_lAT f2_OIBDP_lAT 
f3_size f3_AT f3_SALE f3_OIBDP f3_SALE_lAT f3_OIBDP_lAT 

SaleGrowth_0_l1 SaleGrowth_1_l1 SaleGrowth_2_l1 SaleGrowth_3_l1 SaleGrowth_1_0 SaleGrowth_2_0 SaleGrowth_3_0
OIBDPGrowth_0_l1 OIBDPGrowth_1_l1 OIBDPGrowth_2_l1 OIBDPGrowth_3_l1 OIBDPGrowth_1_0 OIBDPGrowth_2_0 OIBDPGrowth_3_0
SALE_lATGrowth_0_l1 SALE_lATGrowth_1_l1 SALE_lATGrowth_2_l1 SALE_lATGrowth_3_l1 SALE_lATGrowth_1_0 SALE_lATGrowth_2_0 SALE_lATGrowth_3_0
OIBDP_lATGrowth_0_l1 OIBDP_lATGrowth_1_l1 OIBDP_lATGrowth_2_l1 OIBDP_lATGrowth_3_l1 OIBDP_lATGrowth_1_0 OIBDP_lATGrowth_2_0 OIBDP_lATGrowth_3_0;

run;

rsubmit;
proc download data=ann_fund7 out=asie.ann_fund7_new3;
run;


***************************************************************
* Convert to the FF48 industry classifications
***************************************************************;

data asie.ann_fund8;
	set asie.ann_fund7_new3;
	
	if ( sic ge 0100 and sic le 0199) or ( sic ge 0200 and sic le 0299) or ( sic ge 0700 and sic le 0799) or ( sic ge 0910 and sic le 0919) or ( sic ge 2048 and sic le 2048) then ff48= 1;
	if ( sic ge 2000 and sic le 2009) or ( sic ge 2010 and sic le 2019) or ( sic ge 2020 and sic le 2029) or ( sic ge 2030 and sic le 2039) or ( sic ge 2040 and sic le 2046) or ( sic ge 2050 and sic le 2059) or ( sic ge 2060 and sic le 2063) or ( sic ge 2070 and sic le 2079) or ( sic ge 2090 and sic le 2092) or ( sic ge 2095 and sic le 2095) or ( sic ge 2098 and sic le 2099) then ff48= 2;
	if ( sic ge 2064 and sic le 2068) or ( sic ge 2086 and sic le 2086) or ( sic ge 2087 and sic le 2087) or ( sic ge 2096 and sic le 2096) or ( sic ge 2097 and sic le 2097) then ff48= 3;
	if ( sic ge 2080 and sic le 2080) or ( sic ge 2082 and sic le 2082) or ( sic ge 2083 and sic le 2083) or ( sic ge 2084 and sic le 2084) or ( sic ge 2085 and sic le 2085) then ff48= 4;
	if ( sic ge 2100 and sic le 2199) then ff48= 5;
	if ( sic ge 0920 and sic le 0999) or ( sic ge 3650 and sic le 3651) or ( sic ge 3652 and sic le 3652) or ( sic ge 3732 and sic le 3732) or ( sic ge 3930 and sic le 3931) or ( sic ge 3940 and sic le 3949) then ff48= 6;
	if ( sic ge 7800 and sic le 7829) or ( sic ge 7830 and sic le 7833) or ( sic ge 7840 and sic le 7841) or ( sic ge 7900 and sic le 7900) or ( sic ge 7910 and sic le 7911) or ( sic ge 7920 and sic le 7929) or ( sic ge 7930 and sic le 7933) or ( sic ge 7940 and sic le 7949) or ( sic ge 7980 and sic le 7980) or ( sic ge 7990 and sic le 7999) then ff48= 7;
	if ( sic ge 2700 and sic le 2709) or ( sic ge 2710 and sic le 2719) or ( sic ge 2720 and sic le 2729) or ( sic ge 2730 and sic le 2739) or ( sic ge 2740 and sic le 2749) or ( sic ge 2770 and sic le 2771) or ( sic ge 2780 and sic le 2789) or ( sic ge 2790 and sic le 2799) then ff48= 8;
	if ( sic ge 2047 and sic le 2047) or ( sic ge 2391 and sic le 2392) or ( sic ge 2510 and sic le 2519) or ( sic ge 2590 and sic le 2599) or ( sic ge 2840 and sic le 2843) or ( sic ge 2844 and sic le 2844) or ( sic ge 3160 and sic le 3161) or ( sic ge 3170 and sic le 3171) or ( sic ge 3172 and sic le 3172) or ( sic ge 3190 and sic le 3199) or ( sic ge 3229 and sic le 3229) or ( sic ge 3260 and sic le 3260) or ( sic ge 3262 and sic le 3263) or ( sic ge 3269 and sic le 3269) or ( sic ge 3230 and sic le 3231) or ( sic ge 3630 and sic le 3639) or ( sic ge 3750 and sic le 3751) or ( sic ge 3800 and sic le 3800) or ( sic ge 3860 and sic le 3861) or ( sic ge 3870 and sic le 3873) or ( sic ge 3910 and sic le 3911) or ( sic ge 3914 and sic le 3914) or ( sic ge 3915 and sic le 3915) or ( sic ge 3960 and sic le 3962) or ( sic ge 3991 and sic le 3991) or ( sic ge 3995 and sic le 3995) then ff48= 9;
	if ( sic ge 2300 and sic le 2390) or ( sic ge 3020 and sic le 3021) or ( sic ge 3100 and sic le 3111) or ( sic ge 3130 and sic le 3131) or ( sic ge 3140 and sic le 3149) or ( sic ge 3150 and sic le 3151) or ( sic ge 3963 and sic le 3965) then ff48=10;
	if ( sic ge 8000 and sic le 8099) then ff48=11;
	if ( sic ge 3693 and sic le 3693) or ( sic ge 3840 and sic le 3849) or ( sic ge 3850 and sic le 3851) then ff48=12;
	if ( sic ge 2830 and sic le 2830) or ( sic ge 2831 and sic le 2831) or ( sic ge 2833 and sic le 2833) or ( sic ge 2834 and sic le 2834) or ( sic ge 2835 and sic le 2835) or ( sic ge 2836 and sic le 2836) then ff48=13;
	if ( sic ge 2800 and sic le 2809) or ( sic ge 2810 and sic le 2819) or ( sic ge 2820 and sic le 2829) or ( sic ge 2850 and sic le 2859) or ( sic ge 2860 and sic le 2869) or ( sic ge 2870 and sic le 2879) or ( sic ge 2890 and sic le 2899) then ff48=14;
	if ( sic ge 3031 and sic le 3031) or ( sic ge 3041 and sic le 3041) or ( sic ge 3050 and sic le 3053) or ( sic ge 3060 and sic le 3069) or ( sic ge 3070 and sic le 3079) or ( sic ge 3080 and sic le 3089) or ( sic ge 3090 and sic le 3099) then ff48=15;
	if ( sic ge 2200 and sic le 2269) or ( sic ge 2270 and sic le 2279) or ( sic ge 2280 and sic le 2284) or ( sic ge 2290 and sic le 2295) or ( sic ge 2297 and sic le 2297) or ( sic ge 2298 and sic le 2298) or ( sic ge 2299 and sic le 2299) or ( sic ge 2393 and sic le 2395) or ( sic ge 2397 and sic le 2399) then ff48=16;
	if ( sic ge 0800 and sic le 0899) or ( sic ge 2400 and sic le 2439) or ( sic ge 2450 and sic le 2459) or ( sic ge 2490 and sic le 2499) or ( sic ge 2660 and sic le 2661) or ( sic ge 2950 and sic le 2952) or ( sic ge 3200 and sic le 3200) or ( sic ge 3210 and sic le 3211) or ( sic ge 3240 and sic le 3241) or ( sic ge 3250 and sic le 3259) or ( sic ge 3261 and sic le 3261) or ( sic ge 3264 and sic le 3264) or ( sic ge 3270 and sic le 3275) or ( sic ge 3280 and sic le 3281) or ( sic ge 3290 and sic le 3293) or ( sic ge 3295 and sic le 3299) or ( sic ge 3420 and sic le 3429) or ( sic ge 3430 and sic le 3433) or ( sic ge 3440 and sic le 3441) or ( sic ge 3442 and sic le 3442) or ( sic ge 3446 and sic le 3446) or ( sic ge 3448 and sic le 3448) or ( sic ge 3449 and sic le 3449) or ( sic ge 3450 and sic le 3451) or ( sic ge 3452 and sic le 3452) or ( sic ge 3490 and sic le 3499) or ( sic ge 3996 and sic le 3996) then ff48=17;
	if ( sic ge 1500 and sic le 1511) or ( sic ge 1520 and sic le 1529) or ( sic ge 1530 and sic le 1539) or ( sic ge 1540 and sic le 1549) or ( sic ge 1600 and sic le 1699) or ( sic ge 1700 and sic le 1799) then ff48=18;
	if ( sic ge 3300 and sic le 3300) or ( sic ge 3310 and sic le 3317) or ( sic ge 3320 and sic le 3325) or ( sic ge 3330 and sic le 3339) or ( sic ge 3340 and sic le 3341) or ( sic ge 3350 and sic le 3357) or ( sic ge 3360 and sic le 3369) or ( sic ge 3370 and sic le 3379) or ( sic ge 3390 and sic le 3399) then ff48=19;
	if ( sic ge 3400 and sic le 3400) or ( sic ge 3443 and sic le 3443) or ( sic ge 3444 and sic le 3444) or ( sic ge 3460 and sic le 3469) or ( sic ge 3470 and sic le 3479) then ff48=20;
	if ( sic ge 3510 and sic le 3519) or ( sic ge 3520 and sic le 3529) or ( sic ge 3530 and sic le 3530) or ( sic ge 3531 and sic le 3531) or ( sic ge 3532 and sic le 3532) or ( sic ge 3533 and sic le 3533) or ( sic ge 3534 and sic le 3534) or ( sic ge 3535 and sic le 3535) or ( sic ge 3536 and sic le 3536) or ( sic ge 3538 and sic le 3538) or ( sic ge 3540 and sic le 3549) or ( sic ge 3550 and sic le 3559) or ( sic ge 3560 and sic le 3569) or ( sic ge 3580 and sic le 3580) or ( sic ge 3581 and sic le 3581) or ( sic ge 3582 and sic le 3582) or ( sic ge 3585 and sic le 3585) or ( sic ge 3586 and sic le 3586) or ( sic ge 3589 and sic le 3589) or ( sic ge 3590 and sic le 3599) then ff48=21;
	if ( sic ge 3600 and sic le 3600) or ( sic ge 3610 and sic le 3613) or ( sic ge 3620 and sic le 3621) or ( sic ge 3623 and sic le 3629) or ( sic ge 3640 and sic le 3644) or ( sic ge 3645 and sic le 3645) or ( sic ge 3646 and sic le 3646) or ( sic ge 3648 and sic le 3649) or ( sic ge 3660 and sic le 3660) or ( sic ge 3690 and sic le 3690) or ( sic ge 3691 and sic le 3692) or ( sic ge 3699 and sic le 3699) then ff48=22;
	if ( sic ge 2296 and sic le 2296) or ( sic ge 2396 and sic le 2396) or ( sic ge 3010 and sic le 3011) or ( sic ge 3537 and sic le 3537) or ( sic ge 3647 and sic le 3647) or ( sic ge 3694 and sic le 3694) or ( sic ge 3700 and sic le 3700) or ( sic ge 3710 and sic le 3710) or ( sic ge 3711 and sic le 3711) or ( sic ge 3713 and sic le 3713) or ( sic ge 3714 and sic le 3714) or ( sic ge 3715 and sic le 3715) or ( sic ge 3716 and sic le 3716) or ( sic ge 3792 and sic le 3792) or ( sic ge 3790 and sic le 3791) or ( sic ge 3799 and sic le 3799) then ff48=23;
	if ( sic ge 3720 and sic le 3720) or ( sic ge 3721 and sic le 3721) or ( sic ge 3723 and sic le 3724) or ( sic ge 3725 and sic le 3725) or ( sic ge 3728 and sic le 3729) then ff48=24;
	if ( sic ge 3730 and sic le 3731) or ( sic ge 3740 and sic le 3743) then ff48=25;
	if ( sic ge 3760 and sic le 3769) or ( sic ge 3795 and sic le 3795) or ( sic ge 3480 and sic le 3489) then ff48=26;
	if ( sic ge 1040 and sic le 1049) then ff48=27;
	if ( sic ge 1000 and sic le 1009) or ( sic ge 1010 and sic le 1019) or ( sic ge 1020 and sic le 1029) or ( sic ge 1030 and sic le 1039) or ( sic ge 1050 and sic le 1059) or ( sic ge 1060 and sic le 1069) or ( sic ge 1070 and sic le 1079) or ( sic ge 1080 and sic le 1089) or ( sic ge 1090 and sic le 1099) or ( sic ge 1100 and sic le 1119) or ( sic ge 1400 and sic le 1499) then ff48=28;
	if ( sic ge 1200 and sic le 1299) then ff48=29;
	if ( sic ge 1300 and sic le 1300) or ( sic ge 1310 and sic le 1319) or ( sic ge 1320 and sic le 1329) or ( sic ge 1330 and sic le 1339) or ( sic ge 1370 and sic le 1379) or ( sic ge 1380 and sic le 1380) or ( sic ge 1381 and sic le 1381) or ( sic ge 1382 and sic le 1382) or ( sic ge 1389 and sic le 1389) or ( sic ge 2900 and sic le 2912) or ( sic ge 2990 and sic le 2999) then ff48=30;
	if ( sic ge 4900 and sic le 4900) or ( sic ge 4910 and sic le 4911) or ( sic ge 4920 and sic le 4922) or ( sic ge 4923 and sic le 4923) or ( sic ge 4924 and sic le 4925) or ( sic ge 4930 and sic le 4931) or ( sic ge 4932 and sic le 4932) or ( sic ge 4939 and sic le 4939) or ( sic ge 4940 and sic le 4942) then ff48=31;
	if ( sic ge 4800 and sic le 4800) or ( sic ge 4810 and sic le 4813) or ( sic ge 4820 and sic le 4822) or ( sic ge 4830 and sic le 4839) or ( sic ge 4840 and sic le 4841) or ( sic ge 4880 and sic le 4889) or ( sic ge 4890 and sic le 4890) or ( sic ge 4891 and sic le 4891) or ( sic ge 4892 and sic le 4892) or ( sic ge 4899 and sic le 4899) then ff48=32;
	if ( sic ge 7020 and sic le 7021) or ( sic ge 7030 and sic le 7033) or ( sic ge 7200 and sic le 7200) or ( sic ge 7210 and sic le 7212) or ( sic ge 7214 and sic le 7214) or ( sic ge 7215 and sic le 7216) or ( sic ge 7217 and sic le 7217) or ( sic ge 7219 and sic le 7219) or ( sic ge 7220 and sic le 7221) or ( sic ge 7230 and sic le 7231) or ( sic ge 7240 and sic le 7241) or ( sic ge 7250 and sic le 7251) or ( sic ge 7260 and sic le 7269) or ( sic ge 7270 and sic le 7290) or ( sic ge 7291 and sic le 7291) or ( sic ge 7292 and sic le 7299) or ( sic ge 7395 and sic le 7395) or ( sic ge 7500 and sic le 7500) or ( sic ge 7520 and sic le 7529) or ( sic ge 7530 and sic le 7539) or ( sic ge 7540 and sic le 7549) or ( sic ge 7600 and sic le 7600) or ( sic ge 7620 and sic le 7620) or ( sic ge 7622 and sic le 7622) or ( sic ge 7623 and sic le 7623) or ( sic ge 7629 and sic le 7629) or ( sic ge 7630 and sic le 7631) or ( sic ge 7640 and sic le 7641) or ( sic ge 7690 and sic le 7699) or ( sic ge 8100 and sic le 8199) or ( sic ge 8200 and sic le 8299) or ( sic ge 8300 and sic le 8399) or ( sic ge 8400 and sic le 8499) or ( sic ge 8600 and sic le 8699) or ( sic ge 8800 and sic le 8899) or ( sic ge 7510 and sic le 7515) then ff48=33;
	if ( sic ge 2750 and sic le 2759) or ( sic ge 3993 and sic le 3993) or ( sic ge 7218 and sic le 7218) or ( sic ge 7300 and sic le 7300) or ( sic ge 7310 and sic le 7319) or ( sic ge 7320 and sic le 7329) or ( sic ge 7330 and sic le 7339) or ( sic ge 7340 and sic le 7342) or ( sic ge 7349 and sic le 7349) or ( sic ge 7350 and sic le 7351) or ( sic ge 7352 and sic le 7352) or ( sic ge 7353 and sic le 7353) or ( sic ge 7359 and sic le 7359) or ( sic ge 7360 and sic le 7369) or ( sic ge 7370 and sic le 7372) or ( sic ge 7374 and sic le 7374) or ( sic ge 7375 and sic le 7375) or ( sic ge 7376 and sic le 7376) or ( sic ge 7377 and sic le 7377) or ( sic ge 7378 and sic le 7378) or ( sic ge 7379 and sic le 7379) or ( sic ge 7380 and sic le 7380) or ( sic ge 7381 and sic le 7382) or ( sic ge 7383 and sic le 7383) or ( sic ge 7384 and sic le 7384) or ( sic ge 7385 and sic le 7385) or ( sic ge 7389 and sic le 7390) or ( sic ge 7391 and sic le 7391) or ( sic ge 7392 and sic le 7392) or ( sic ge 7393 and sic le 7393) or ( sic ge 7394 and sic le 7394) or ( sic ge 7396 and sic le 7396) or ( sic ge 7397 and sic le 7397) or ( sic ge 7399 and sic le 7399) or ( sic ge 7519 and sic le 7519) or ( sic ge 8700 and sic le 8700) or ( sic ge 8710 and sic le 8713) or ( sic ge 8720 and sic le 8721) or ( sic ge 8730 and sic le 8734) or ( sic ge 8740 and sic le 8748) or ( sic ge 8900 and sic le 8910) or ( sic ge 8911 and sic le 8911) or ( sic ge 8920 and sic le 8999) or ( sic ge 4220 and sic le 4229) then ff48=34;
	if ( sic ge 3570 and sic le 3579) or ( sic ge 3680 and sic le 3680) or ( sic ge 3681 and sic le 3681) or ( sic ge 3682 and sic le 3682) or ( sic ge 3683 and sic le 3683) or ( sic ge 3684 and sic le 3684) or ( sic ge 3685 and sic le 3685) or ( sic ge 3686 and sic le 3686) or ( sic ge 3687 and sic le 3687) or ( sic ge 3688 and sic le 3688) or ( sic ge 3689 and sic le 3689) or ( sic ge 3695 and sic le 3695) or ( sic ge 7373 and sic le 7373) then ff48=35;
	if ( sic ge 3622 and sic le 3622) or ( sic ge 3661 and sic le 3661) or ( sic ge 3662 and sic le 3662) or ( sic ge 3663 and sic le 3663) or ( sic ge 3664 and sic le 3664) or ( sic ge 3665 and sic le 3665) or ( sic ge 3666 and sic le 3666) or ( sic ge 3669 and sic le 3669) or ( sic ge 3670 and sic le 3679) or ( sic ge 3810 and sic le 3810) or ( sic ge 3812 and sic le 3812) then ff48=36;
	if ( sic ge 3811 and sic le 3811) or ( sic ge 3820 and sic le 3820) or ( sic ge 3821 and sic le 3821) or ( sic ge 3822 and sic le 3822) or ( sic ge 3823 and sic le 3823) or ( sic ge 3824 and sic le 3824) or ( sic ge 3825 and sic le 3825) or ( sic ge 3826 and sic le 3826) or ( sic ge 3827 and sic le 3827) or ( sic ge 3829 and sic le 3829) or ( sic ge 3830 and sic le 3839) then ff48=37;
	if ( sic ge 2520 and sic le 2549) or ( sic ge 2600 and sic le 2639) or ( sic ge 2670 and sic le 2699) or ( sic ge 2760 and sic le 2761) or ( sic ge 3950 and sic le 3955) then ff48=38;
	if ( sic ge 2440 and sic le 2449) or ( sic ge 2640 and sic le 2659) or ( sic ge 3220 and sic le 3221) or ( sic ge 3410 and sic le 3412) then ff48=39;
	if ( sic ge 4000 and sic le 4013) or ( sic ge 4040 and sic le 4049) or ( sic ge 4100 and sic le 4100) or ( sic ge 4110 and sic le 4119) or ( sic ge 4120 and sic le 4121) or ( sic ge 4130 and sic le 4131) or ( sic ge 4140 and sic le 4142) or ( sic ge 4150 and sic le 4151) or ( sic ge 4170 and sic le 4173) or ( sic ge 4190 and sic le 4199) or ( sic ge 4200 and sic le 4200) or ( sic ge 4210 and sic le 4219) or ( sic ge 4230 and sic le 4231) or ( sic ge 4240 and sic le 4249) or ( sic ge 4400 and sic le 4499) or ( sic ge 4500 and sic le 4599) or ( sic ge 4600 and sic le 4699) or ( sic ge 4700 and sic le 4700) or ( sic ge 4710 and sic le 4712) or ( sic ge 4720 and sic le 4729) or ( sic ge 4730 and sic le 4739) or ( sic ge 4740 and sic le 4749) or ( sic ge 4780 and sic le 4780) or ( sic ge 4782 and sic le 4782) or ( sic ge 4783 and sic le 4783) or ( sic ge 4784 and sic le 4784) or ( sic ge 4785 and sic le 4785) or ( sic ge 4789 and sic le 4789) then ff48=40;
	if ( sic ge 5000 and sic le 5000) or ( sic ge 5010 and sic le 5015) or ( sic ge 5020 and sic le 5023) or ( sic ge 5030 and sic le 5039) or ( sic ge 5040 and sic le 5042) or ( sic ge 5043 and sic le 5043) or ( sic ge 5044 and sic le 5044) or ( sic ge 5045 and sic le 5045) or ( sic ge 5046 and sic le 5046) or ( sic ge 5047 and sic le 5047) or ( sic ge 5048 and sic le 5048) or ( sic ge 5049 and sic le 5049) or ( sic ge 5050 and sic le 5059) or ( sic ge 5060 and sic le 5060) or ( sic ge 5063 and sic le 5063) or ( sic ge 5064 and sic le 5064) or ( sic ge 5065 and sic le 5065) or ( sic ge 5070 and sic le 5078) or ( sic ge 5080 and sic le 5080) or ( sic ge 5081 and sic le 5081) or ( sic ge 5082 and sic le 5082) or ( sic ge 5083 and sic le 5083) or ( sic ge 5084 and sic le 5084) or ( sic ge 5085 and sic le 5085) or ( sic ge 5086 and sic le 5087) or ( sic ge 5088 and sic le 5088) or ( sic ge 5090 and sic le 5090) or ( sic ge 5091 and sic le 5092) or ( sic ge 5093 and sic le 5093) or ( sic ge 5094 and sic le 5094) or ( sic ge 5099 and sic le 5099) or ( sic ge 5100 and sic le 5100) or ( sic ge 5110 and sic le 5113) or ( sic ge 5120 and sic le 5122) or ( sic ge 5130 and sic le 5139) or ( sic ge 5140 and sic le 5149) or ( sic ge 5150 and sic le 5159) or ( sic ge 5160 and sic le 5169) or ( sic ge 5170 and sic le 5172) or ( sic ge 5180 and sic le 5182) or ( sic ge 5190 and sic le 5199) then ff48=41;
	if ( sic ge 5200 and sic le 5200) or ( sic ge 5210 and sic le 5219) or ( sic ge 5220 and sic le 5229) or ( sic ge 5230 and sic le 5231) or ( sic ge 5250 and sic le 5251) or ( sic ge 5260 and sic le 5261) or ( sic ge 5270 and sic le 5271) or ( sic ge 5300 and sic le 5300) or ( sic ge 5310 and sic le 5311) or ( sic ge 5320 and sic le 5320) or ( sic ge 5330 and sic le 5331) or ( sic ge 5334 and sic le 5334) or ( sic ge 5340 and sic le 5349) or ( sic ge 5390 and sic le 5399) or ( sic ge 5400 and sic le 5400) or ( sic ge 5410 and sic le 5411) or ( sic ge 5412 and sic le 5412) or ( sic ge 5420 and sic le 5429) or ( sic ge 5430 and sic le 5439) or ( sic ge 5440 and sic le 5449) or ( sic ge 5450 and sic le 5459) or ( sic ge 5460 and sic le 5469) or ( sic ge 5490 and sic le 5499) or ( sic ge 5500 and sic le 5500) or ( sic ge 5510 and sic le 5529) or ( sic ge 5530 and sic le 5539) or ( sic ge 5540 and sic le 5549) or ( sic ge 5550 and sic le 5559) or ( sic ge 5560 and sic le 5569) or ( sic ge 5570 and sic le 5579) or ( sic ge 5590 and sic le 5599) or ( sic ge 5600 and sic le 5699) or ( sic ge 5700 and sic le 5700) or ( sic ge 5710 and sic le 5719) or ( sic ge 5720 and sic le 5722) or ( sic ge 5730 and sic le 5733) or ( sic ge 5734 and sic le 5734) or ( sic ge 5735 and sic le 5735) or ( sic ge 5736 and sic le 5736) or ( sic ge 5750 and sic le 5799) or ( sic ge 5900 and sic le 5900) or ( sic ge 5910 and sic le 5912) or ( sic ge 5920 and sic le 5929) or ( sic ge 5930 and sic le 5932) or ( sic ge 5940 and sic le 5940) or ( sic ge 5941 and sic le 5941) or ( sic ge 5942 and sic le 5942) or ( sic ge 5943 and sic le 5943) or ( sic ge 5944 and sic le 5944) or ( sic ge 5945 and sic le 5945) or ( sic ge 5946 and sic le 5946) or ( sic ge 5947 and sic le 5947) or ( sic ge 5948 and sic le 5948) or ( sic ge 5949 and sic le 5949) or ( sic ge 5950 and sic le 5959) or ( sic ge 5960 and sic le 5969) or ( sic ge 5970 and sic le 5979) or ( sic ge 5980 and sic le 5989) or ( sic ge 5990 and sic le 5990) or ( sic ge 5992 and sic le 5992) or ( sic ge 5993 and sic le 5993) or ( sic ge 5994 and sic le 5994) or ( sic ge 5995 and sic le 5995) or ( sic ge 5999 and sic le 5999) then ff48=42;
	if ( sic ge 5800 and sic le 5819) or ( sic ge 5820 and sic le 5829) or ( sic ge 5890 and sic le 5899) or ( sic ge 7000 and sic le 7000) or ( sic ge 7010 and sic le 7019) or ( sic ge 7040 and sic le 7049) or ( sic ge 7213 and sic le 7213) then ff48=43;
	if ( sic ge 6000 and sic le 6000) or ( sic ge 6010 and sic le 6019) or ( sic ge 6020 and sic le 6020) or ( sic ge 6021 and sic le 6021) or ( sic ge 6022 and sic le 6022) or ( sic ge 6023 and sic le 6024) or ( sic ge 6025 and sic le 6025) or ( sic ge 6026 and sic le 6026) or ( sic ge 6027 and sic le 6027) or ( sic ge 6028 and sic le 6029) or ( sic ge 6030 and sic le 6036) or ( sic ge 6040 and sic le 6059) or ( sic ge 6060 and sic le 6062) or ( sic ge 6080 and sic le 6082) or ( sic ge 6090 and sic le 6099) or ( sic ge 6100 and sic le 6100) or ( sic ge 6110 and sic le 6111) or ( sic ge 6112 and sic le 6113) or ( sic ge 6120 and sic le 6129) or ( sic ge 6130 and sic le 6139) or ( sic ge 6140 and sic le 6149) or ( sic ge 6150 and sic le 6159) or ( sic ge 6160 and sic le 6169) or ( sic ge 6170 and sic le 6179) or ( sic ge 6190 and sic le 6199) then ff48=44;
	if ( sic ge 6300 and sic le 6300) or ( sic ge 6310 and sic le 6319) or ( sic ge 6320 and sic le 6329) or ( sic ge 6330 and sic le 6331) or ( sic ge 6350 and sic le 6351) or ( sic ge 6360 and sic le 6361) or ( sic ge 6370 and sic le 6379) or ( sic ge 6390 and sic le 6399) or ( sic ge 6400 and sic le 6411) then ff48=45;
	if ( sic ge 6500 and sic le 6500) or ( sic ge 6510 and sic le 6510) or ( sic ge 6512 and sic le 6512) or ( sic ge 6513 and sic le 6513) or ( sic ge 6514 and sic le 6514) or ( sic ge 6515 and sic le 6515) or ( sic ge 6517 and sic le 6519) or ( sic ge 6520 and sic le 6529) or ( sic ge 6530 and sic le 6531) or ( sic ge 6532 and sic le 6532) or ( sic ge 6540 and sic le 6541) or ( sic ge 6550 and sic le 6553) or ( sic ge 6590 and sic le 6599) or ( sic ge 6610 and sic le 6611) then ff48=46;
	if ( sic ge 6200 and sic le 6299) or ( sic ge 6700 and sic le 6700) or ( sic ge 6710 and sic le 6719) or ( sic ge 6720 and sic le 6722) or ( sic ge 6723 and sic le 6723) or ( sic ge 6724 and sic le 6724) or ( sic ge 6725 and sic le 6725) or ( sic ge 6726 and sic le 6726) or ( sic ge 6730 and sic le 6733) or ( sic ge 6740 and sic le 6779) or ( sic ge 6790 and sic le 6791) or ( sic ge 6792 and sic le 6792) or ( sic ge 6793 and sic le 6793) or ( sic ge 6794 and sic le 6794) or ( sic ge 6795 and sic le 6795) or ( sic ge 6798 and sic le 6798) or ( sic ge 6799 and sic le 6799) then ff48=47;
	if ( sic ge 4950 and sic le 4959) or ( sic ge 4960 and sic le 4961) or ( sic ge 4970 and sic le 4971) or ( sic ge 4990 and sic le 4991) then ff48=48;

	run;

***************************************************************
* Excluding microcap stocks 
***************************************************************;

proc sql;
  create table asie.ann_fund8a as
  select a.*,b.*
    from asie.ann_fund8 as a left join asie.size_breakpoints_new as b 
    on a.year=b.year 
    and a.month=b.month;
 run;

data asie.ann_fund8b;
set asie.ann_fund8a;
if abs(prc) ge 5;
run;

proc sort data=asie.ann_fund8b;
by year l_size;
run;

data asie.ann_fund8c;
set asie.ann_fund8b;
if l_size/1000 le p10 then do;
r_var=0;
end;
else do;
if l_size/1000 le p20 then do;
r_var=1;
end;
else do;
if l_size/1000 le p30 then do;
r_var=2;
end;
else do;
if l_size/1000 le p40 then do;
r_var=3;
end;
else do;
if l_size/1000 le p50 then do;
r_var=4;
end;
else do;
if l_size/1000 le p60 then do;
r_var=5;
end;
else do;
if l_size/1000 le p70 then do;
r_var=6;
end;
else do;
if l_size/1000 le p80 then do;
r_var=7;
end;
else do;
if l_size/1000 le p90 then do;
r_var=8;
end;
else do;
r_var=9;
end;
end;
end;
end;
end;
end;
end;
end;
end;
run;

proc freq data=asie.ann_fund8c;
tables r_var;
run;

data asie.ann_fund8d;
set asie.ann_fund8c;
if r_var ge 2;
run;


*----------------------winsorizing outliers-----------------------------------------------------;
%macro outliers();

%let vars= SALEGrowth_1_0 SALEGrowth_2_0 SALEGrowth_3_0
           OIBDPGrowth_1_0 OIBDPGrowth_2_0 OIBDPGrowth_3_0
           
           SALE_lATGrowth_1_0 SALE_lATGrowth_2_0 SALE_lATGrowth_3_0
           OIBDP_lATGrowth_1_0 OIBDP_lATGrowth_2_0 OIBDP_lATGrowth_3_0;

%let cvar=%scan(&vars,1);
%let pos=1;

%do %while (&cvar ne );

%put &cvar;

proc sort data=asie.ann_fund8d; by year; run;

proc means data=asie.ann_fund8d min p1 p99 max median mean n noprint;
by year;
var &cvar;
output out = asie.outliers 
p1 (&cvar) = p1_&cvar
p99 (&cvar) = p99_&cvar;
run;

proc sql;
create table asie.temp as select a.*, b.p1_&cvar, b.p99_&cvar
from asie.ann_fund8d as a left join asie.outliers as b
on a.year=b.year;
quit;

data asie.ann_fund8d; set asie.temp;
if &cvar<p1_&cvar and &cvar ne . and p1_&cvar ne . then &cvar=p1_&cvar;
if &cvar>p99_&cvar and &cvar ne . and p99_&cvar ne . then &cvar=p99_&cvar;
drop p1_&cvar p99_&cvar;
run;

%let pos=%eval(&pos+1);
%let cvar=%scan(&vars,&pos);

%end;

%mend outliers;


%outliers;

proc sort data=asie.ann_fund8d; by permno year; run;

data asie.ann_fund12; set asie.ann_fund8d; run;


/****************************************************/
/** CODE 7: Code_SimpleCount_Base_Mentioning **/
/****************************************************/

/** work on the base mention file **/

data asie.simple_count1;
set asie.sec_competition_new;
byear=year(fiscalmonth);
bmonth=month(fiscalmonth);
if filingticker='GOOG' then filingticker='GOOGL';
if mentionedticker='GOOG' then mentionedticker='GOOGL';
if filingticker='UA' then filingticker='UAA';
if mentionedticker='UA' then mentionedticker='UAA';
if filingticker='CENT' then filingticker='CENTA';
if mentionedticker='CENT' then mentionedticker='CENTA';
keep filingticker mentionedticker byear bmonth;
if mentionedticker ne '';
keep filingticker mentionedticker byear bmonth;
proc print;
run;

proc sort data=asie.simple_count1 nodupkey;
by filingticker mentionedticker byear bmonth;
run;

data asie.simple_count1a;
set asie.simple_count1;
if byear ge 1995 and byear le 2017;
n_mentionedticker=filingticker;
n_filingticker=mentionedticker;
drop filingticker mentionedticker;
run;

data asie.simple_count2;
set asie.simple_count1a;
mentionedticker=n_mentionedticker;
filingticker=n_filingticker;
drop n_filingticker n_mentionedticker;
proc print;
run;

/** add GIC **/

proc sql;
  create table asie.simple_count2_a as
  select a.*,b.*
    from asie.simple_count2 as a left join asie.firm_gic as b
    on a.filingticker=b.ticker 
    and a.byear=b.fyear
	and a.bmonth=b.fmonth;
 run;

data asie.simple_count2_b;
set asie.simple_count2_a;
f_gic=gic;
keep filingticker mentionedticker byear bmonth f_gic;
run;

proc sql;
  create table asie.simple_count2_c as
  select a.*,b.*
    from asie.simple_count2_b as a left join asie.firm_gic as b
    on a.mentionedticker=b.ticker 
    and a.byear=b.fyear
	and a.bmonth=b.fmonth;
 run;

data asie.simple_count2_d;
set asie.simple_count2_c;
m_gic=gic;
keep filingticker mentionedticker byear bmonth f_gic m_gic;
run;

proc sort data=asie.simple_count2_d;
by filingticker byear bmonth mentionedticker;
run;

data asie.simple_count2_inter;
set asie.simple_count2_d;
if f_gic ne . and m_gic ne . and f_gic ne m_gic;
proc print;
run;

data asie.simple_count2_in;
set asie.simple_count2_d;
if f_gic ne . and m_gic ne . and f_gic=m_gic;
proc print;
run;

/** count only the mentioned firms; all firms: 'simple_count2', outsdie-industry: 'simple_count2_inter', inside-industry: 'simple_count2_in'**/

proc sort data=asie.simple_count2;
by mentionedticker byear bmonth;
run;

proc means data=asie.simple_count2 noprint;
by mentionedticker byear bmonth;
var byear; 
output out=asie.simple_count3 n=mention_count;
proc print data=asie.simple_count3;
run;

/** get mean and sum of firm size of the mentioning firms **/

data asie.main_file1;
set asie.firm_characteristics;
if year ge 1994;
keep tic year month l_size;
run;

proc sql;
  create table asie.simple_count2_size1 as
  select a.*,b.*
    from asie.simple_count2 as a left join asie.main_file1 as b 
    on a.filingticker=b.tic 
    and a.byear=b.year
	and a.bmonth=b.month;
 run;

proc sort data=asie.simple_count2_size1 nodupkey;
by mentionedticker filingticker byear bmonth;
run;

proc sort data=asie.simple_count2_size1;
by mentionedticker byear bmonth;
run;

proc means data=asie.simple_count2_size1 noprint;
by mentionedticker byear bmonth;
var byear; 
output out=asie.simple_count3 n=mention_count;
proc print data=asie.simple_count3;
run;

proc means data=asie.simple_count2_size1 noprint;
by mentionedticker byear bmonth;
var l_size; 
output out=asie.simple_size3 n=size_count sum=sum_size max=max_size min=min_size;
proc print data=asie.simple_size3;
run;

proc sql;
  create table asie.simple_count3a as
  select a.*,b.*
    from asie.simple_count3 as a left join asie.simple_size3 as b
    on a.mentionedticker=b.mentionedticker 
    and a.byear=b.byear
	and a.bmonth=b.bmonth;
 run;

/*************************************/

data asie.mentioned_firms1;
set asie.simple_count3a;
keep mentionedticker;
run;

/** list of all mentioned firms **/

proc sort data=asie.mentioned_firms1 nodupkey;
by mentionedticker;
run;

/** add all months to all mentioned firms **/

proc print data=asie.months1;
run;

proc sql;
  create table asie.mentioned_firms2 as
  select a.*,b.*
    from asie.months1 as a, asie.mentioned_firms1 as b;
 run;

proc sort data=asie.mentioned_firms2;
by mentionedticker year month;
run;

/** merge the file with the mentiones to the file with all mentioned firms in all months **/

proc sql;
  create table asie.simple_count4 as
  select a.*,b.*
    from asie.mentioned_firms2 as a left join asie.simple_count3a as b
    on a.mentionedticker=b.mentionedticker 
    and a.year=b.byear
	and a.month=b.bmonth;
 run;

data asie.simple_count5;
set asie.simple_count4;
if mention_count=. then mention_count=0;
if size_count=. then size_count=0;
if sum_size= . then sum_size=0;
/**if max_size =. then max_size=0;
if min_size =. then min_size=0;**/
run;

/** sum all mentions in the last 12 months **/

proc sort data=asie.simple_count5;
by mentionedticker year month;
run;

data asie.simple_count6;
set asie.simple_count5;
l11mt=lag11(mentionedticker);

lag1mc=lag(mention_count);
lag2mc=lag2(mention_count);
lag3mc=lag3(mention_count);
lag4mc=lag4(mention_count);
lag5mc=lag5(mention_count);
lag6mc=lag6(mention_count);
lag7mc=lag7(mention_count);
lag8mc=lag8(mention_count);
lag9mc=lag9(mention_count);
lag10mc=lag10(mention_count);
lag11mc=lag11(mention_count);

lag1sc=lag(size_count);
lag2sc=lag2(size_count);
lag3sc=lag3(size_count);
lag4sc=lag4(size_count);
lag5sc=lag5(size_count);
lag6sc=lag6(size_count);
lag7sc=lag7(size_count);
lag8sc=lag8(size_count);
lag9sc=lag9(size_count);
lag10sc=lag10(size_count);
lag11sc=lag11(size_count);

lag1s_size=lag(sum_size);
lag2s_size=lag2(sum_size);
lag3s_size=lag3(sum_size);
lag4s_size=lag4(sum_size);
lag5s_size=lag5(sum_size);
lag6s_size=lag6(sum_size);
lag7s_size=lag7(sum_size);
lag8s_size=lag8(sum_size);
lag9s_size=lag9(sum_size);
lag10s_size=lag10(sum_size);
lag11s_size=lag11(sum_size);

lag1max_size=lag(max_size);
lag2max_size=lag2(max_size);
lag3max_size=lag3(max_size);
lag4max_size=lag4(max_size);
lag5max_size=lag5(max_size);
lag6max_size=lag6(max_size);
lag7max_size=lag7(max_size);
lag8max_size=lag8(max_size);
lag9max_size=lag9(max_size);
lag10max_size=lag10(max_size);
lag11max_size=lag11(max_size);

lag1min_size=lag(min_size);
lag2min_size=lag2(min_size);
lag3min_size=lag3(min_size);
lag4min_size=lag4(min_size);
lag5min_size=lag5(min_size);
lag6min_size=lag6(min_size);
lag7min_size=lag7(min_size);
lag8min_size=lag8(min_size);
lag9min_size=lag9(min_size);
lag10min_size=lag10(min_size);
lag11min_size=lag11(min_size);

if l11mt=mentionedticker then do;
simple_count=mention_count+lag1mc+lag2mc+lag3mc+lag4mc+lag5mc+lag6mc+lag7mc+lag8mc+lag9mc+lag10mc+lag11mc;
simple_size_count=size_count+lag1sc+lag2sc+lag3sc+lag4sc+lag5sc+lag6sc+lag7sc+lag8sc+lag9sc+lag10sc+lag11sc;
simple_sum_size=sum_size+lag1s_size+lag2s_size+lag3s_size+lag4s_size+lag5s_size+lag6s_size+lag7s_size+lag8s_size+lag9s_size+lag10s_size+lag11s_size;
simple_mean_size=simple_sum_size/simple_size_count;
simple_max_size=max(max_size,lag1max_size,lag2max_size,lag3max_size,lag4max_size,lag5max_size,lag6max_size,lag7max_size,lag8max_size,lag9max_size,lag10max_size,lag11max_size);
simple_min_size=min(min_size,lag1min_size,lag2min_size,lag3min_size,lag4min_size,lag5min_size,lag6min_size,lag7min_size,lag8min_size,lag9min_size,lag10min_size,lag11min_size);
end;
drop byear bmonth _TYPE_ _FREQ_ l11mt lag1mc lag2mc lag3mc lag4mc lag5mc lag6mc lag7mc lag8mc lag9mc lag10mc lag11mc
     l11mt lag1sc lag2sc lag3sc lag4sc lag5sc lag6sc lag7sc lag8sc lag9sc lag10sc lag11sc
     l11mt lag1s_size lag2s_size lag3s_size lag4s_size lag5s_size lag6s_size lag7s_size lag8s_size lag9s_size lag10s_size lag11s_size
     lag1max_size lag2max_size lag3max_size lag4max_size lag5max_size lag6max_size lag7max_size lag8max_size lag9max_size lag10max_size lag11max_size
     lag1min_size lag2min_size lag3min_size lag4min_size lag5min_size lag6min_size lag7min_size lag8min_size lag9min_size lag10min_size lag11min_size;
run;

/** generate lags in simple counts **/

proc sort data=asie.simple_count6;
by mentionedticker year month;
run;

data asie.simple_count7_mentioning; /** change to simple_count7_inter and simple_count7_in **/
set asie.simple_count6;
 lt=lag3(mentionedticker);

 lag3simple_count=lag3(simple_count);
 lag3simple_sum_size=lag3(simple_sum_size);
 lag3simple_mean_size=lag3(simple_mean_size);
 lag3simple_max_size=lag3(simple_max_size);
 lag3simple_min_size=lag3(simple_min_size);
 lag3simple_size_count=lag3(simple_size_count);
 
 if lt=mentionedticker then do;

 l3simple_count=lag3simple_count;
 l3simple_sum_size=lag3simple_sum_size;
 l3simple_mean_size=lag3simple_mean_size;
 l3simple_max_size=lag3simple_max_size;
 l3simple_min_size=lag3simple_min_size;
 l3simple_size_count=lag3simple_size_count;
 
end;
drop lag3simple_count lag3simple_sum_size lag3simple_mean_size lag3simple_max_size lag3simple_min_size lag3simple_size_count;
run;


/****************************************************/
/** CODE 8: Code_DoubleSort_SharedAnalysts **/
/****************************************************/

%let wrds = wrds.wharton.upenn.edu 4016; 
options comamid=TCP remote=WRDS;
signon username=_prompt_;

/** get analyst coverage **/

rsubmit;
data det_XEPSUS; 
set ibes.DET_XEPSUS; 
run;

rsubmit;
data DET_XEPSUS2;
set DET_XEPSUS;
year_rev=year(REVDATS);
month_rev=month(REVDATS);
keep TICKER ANALYS REVDATS year_rev month_rev fpi;
if year_rev ge 1994;
run;

rsubmit;
proc sort data=DET_XEPSUS2 nodupkey;
by analys year_rev month_rev ticker;
run; 

rsubmit;
proc contents data=DET_XEPSUS2;
run;

libname asie 'd:\sas files';
run;

rsubmit;
proc upload data=asie.iclink_new out=iclink_new; 
run;

rsubmit;
proc sql; 
create table DET_XEPSUS3 
as select a.*, b.*
from DET_XEPSUS2 as a left join iclink_new as b
on a.ticker=b.ticker;
run;

rsubmit;
proc sort data=DET_XEPSUS3 nodupkey;
by analys year_rev month_rev ticker;
run; 

rsubmit;
proc download data=DET_XEPSUS3 out=asie.ibes_analyst; 
run;

/** generate analyst coverage conenction file **/

data asie.ibes_analyst1;
set asie.ibes_analyst;
keep analys permno ticker year_rev month_rev;
run;

data asie.ibes_analyst_share1;
set asie.ibes_analyst1;
if year_rev ge 1995 and year_rev le 2017;
run;

proc sort data=asie.ibes_analyst_share1 nodupkey;
by analys year_rev ticker;
run; 

data asie.ibes_analyst_share2;
set asie.ibes_analyst_share1;
ticker1=ticker;
keep ticker1 analys year_rev;
run;

data asie.ibes_analyst_share3;
set asie.ibes_analyst_share1;
ticker2=ticker;
keep ticker2 analys year_rev;
run;

proc sql;
  create table asie.ibes_analyst_share4 as
  select a.*,b.*
    from asie.ibes_analyst_share2 as a left join asie.ibes_analyst_share3 as b
    on a.analys=b.analys 
    and a.year_rev=b.year_rev;
 run;

proc sort data=asie.ibes_analyst_share4;
by year_rev ticker1 analys ticker2;
run; 

proc sort data=asie.ibes_analyst_share4;
by ticker1 ticker2 year_rev;
run;

proc means data=asie.ibes_analyst_share4 noprint;
by ticker1 ticker2 year_rev;
var year_rev;
output out=asie.shared_analyst1 n=count;
run;

data asie.shared_analyst2;
set asie.shared_analyst1;
if ticker1 ne ticker2;
run;

/** get the permno of the shared analyst firm **/

data asie.permno_cusip_gvkey_tic_sic_2;
set asie.permno_cusip_gvkey_tic_sic;
year_p=year(date);
month_p=month(date);
day=1;
date_p=MDY(Month_p, day, year_p);
format date_p monyy7.;
if year_p ge 1994 and year_p le 2017;
run;

proc sort data=asie.permno_cusip_gvkey_tic_sic_2;
by permno year_p month_p;
run;

data asie.aaa;
set asie.shared_analyst2;
if year_rev=1995;
run;

data asie.bbb;
set asie.permno_cusip_gvkey_tic_sic_2;
if year_p=1995;
run;

proc sql;
  create table asie.shared_analyst3 as
  select a.*,b.*
    from asie.permno_cusip_gvkey_tic_sic_2 as a left join asie.shared_analyst2 as b
    on a.tic=b.ticker2
	and a.year_p=b.year_rev;
 run; 

data asie.shared_analyst4;
set asie.shared_analyst3;
if count ne .;
run;


/** get monthly returns of the shared analyst firms **/

%let wrds = wrds-cloud.wharton.upenn.edu 4016; 
options comamid=TCP remote=WRDS;
signon username=_prompt_;

rsubmit;
data monthly_ret;
set crsp.msf;
run;

rsubmit;
data monthly_ret2;
set monthly_ret;
year=year(date);
month=month(date);
if year ge 1994 and year le 2017;
run;

rsubmit;
proc sort data=monthly_ret2;
by permno date;
run;

rsubmit;
data monthly_ret3;
set monthly_ret2;
lp=lag(permno);
lagret=lag(ret);
if lp=permno then do;
lret=lagret;
end;
keep permno date year month ret lret;
run;

rsubmit;
data monthly_ret4;
set monthly_ret3;
c2_permno=permno;
drop permno;
run;

rsubmit;
proc download data=monthly_ret4 out=asie.monthly_ret4;
run;

data asie.shared_analyst4_95;
set asie.shared_analyst4;
if year_p=1995;
run;

data asie.shared_analyst4_96;
set asie.shared_analyst4;
if year_p=1996;
run;

data asie.shared_analyst4_97;
set asie.shared_analyst4;
if year_p=1997;
run;

data asie.shared_analyst4_98;
set asie.shared_analyst4;
if year_p=1998;
run;

data asie.shared_analyst4_99;
set asie.shared_analyst4;
if year_p=1999;
run;

data asie.shared_analyst4_00;
set asie.shared_analyst4;
if year_p=2000;
run;

data asie.shared_analyst4_01;
set asie.shared_analyst4;
if year_p=2001;
run;

data asie.shared_analyst4_02;
set asie.shared_analyst4;
if year_p=2002;
run;

data asie.shared_analyst4_03;
set asie.shared_analyst4;
if year_p=2003;
run;

data asie.shared_analyst4_04;
set asie.shared_analyst4;
if year_p=2004;
run;

data asie.shared_analyst4_05;
set asie.shared_analyst4;
if year_p=2005;
run;

data asie.shared_analyst4_06;
set asie.shared_analyst4;
if year_p=2006;
run;

data asie.shared_analyst4_07;
set asie.shared_analyst4;
if year_p=2007;
run;

data asie.shared_analyst4_08;
set asie.shared_analyst4;
if year_p=2008;
run;

data asie.shared_analyst4_09;
set asie.shared_analyst4;
if year_p=2009;
run;

data asie.shared_analyst4_10;
set asie.shared_analyst4;
if year_p=2010;
run;

data asie.shared_analyst4_11;
set asie.shared_analyst4;
if year_p=2011;
run;

data asie.shared_analyst4_12;
set asie.shared_analyst4;
if year_p=2012;
run;

data asie.shared_analyst4_13;
set asie.shared_analyst4;
if year_p=2013;
run;

data asie.shared_analyst4_14;
set asie.shared_analyst4;
if year_p=2014;
run;

data asie.shared_analyst4_15;
set asie.shared_analyst4;
if year_p=2015;
run;

data asie.shared_analyst4_16;
set asie.shared_analyst4;
if year_p=2016;
run;

data asie.shared_analyst4_17;
set asie.shared_analyst4;
if year_p=2017;
run;


proc sql;
  create table asie.shared_analyst5_95 as
  select a.*,b.*
    from asie.shared_analyst4_95 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_96 as
  select a.*,b.*
    from asie.shared_analyst4_96 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_97 as
  select a.*,b.*
    from asie.shared_analyst4_97 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_98 as
  select a.*,b.*
    from asie.shared_analyst4_98 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_99 as
  select a.*,b.*
    from asie.shared_analyst4_99 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_00 as
  select a.*,b.*
    from asie.shared_analyst4_00 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_01 as
  select a.*,b.*
    from asie.shared_analyst4_01 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_02 as
  select a.*,b.*
    from asie.shared_analyst4_02 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_03 as
  select a.*,b.*
    from asie.shared_analyst4_03 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_04 as
  select a.*,b.*
    from asie.shared_analyst4_04 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_05 as
  select a.*,b.*
    from asie.shared_analyst4_05 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_06 as
  select a.*,b.*
    from asie.shared_analyst4_06 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_07 as
  select a.*,b.*
    from asie.shared_analyst4_07 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_08 as
  select a.*,b.*
    from asie.shared_analyst4_08 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_09 as
  select a.*,b.*
    from asie.shared_analyst4_09 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_10 as
  select a.*,b.*
    from asie.shared_analyst4_10 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_11 as
  select a.*,b.*
    from asie.shared_analyst4_11 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_12 as
  select a.*,b.*
    from asie.shared_analyst4_12 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_13 as
  select a.*,b.*
    from asie.shared_analyst4_13 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_14 as
  select a.*,b.*
    from asie.shared_analyst4_14 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_15 as
  select a.*,b.*
    from asie.shared_analyst4_15 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_16 as
  select a.*,b.*
    from asie.shared_analyst4_16 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 

 proc sql;
  create table asie.shared_analyst5_17 as
  select a.*,b.*
    from asie.shared_analyst4_17 as a left join asie.monthly_ret4 as b
    on a.permno=b.c2_permno
	and a.year_p=b.year
    and a.month_p=b.month;
 run; 


proc sort data=asie.shared_analyst5_95;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_95 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_95_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_96;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_96 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_96_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_97;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_97 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_97_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_98;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_98 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_98_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_99;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_99 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_99_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_00;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_00 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_00_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_01;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_01 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_01_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_02;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_02 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_02_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_03;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_03 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_03_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_04;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_04 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_04_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_05;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_05 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_05_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_06;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_06 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_06_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_07;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_07 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_07_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_08;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_08 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_08_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_09;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_09 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_09_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_10;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_10 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_10_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_11;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_11 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_11_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_12;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_12 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_12_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_13;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_13 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_13_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_14;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_14 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_14_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_15;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_15 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_15_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_16;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_16 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_16_a mean=sa_lret;
run;

proc sort data=asie.shared_analyst5_17;
by ticker1 year_p month_p ticker2;
run;

proc means data=asie.shared_analyst5_17 noprint;
by ticker1 year_p month_p;
var lret/weight=count;
output out=asie.shared_analyst5_17_a mean=sa_lret;
run;


data asie.shared_analyst6;
set asie.shared_analyst5_95_a asie.shared_analyst5_96_a asie.shared_analyst5_97_a asie.shared_analyst5_98_a asie.shared_analyst5_99_a
    asie.shared_analyst5_00_a asie.shared_analyst5_01_a asie.shared_analyst5_02_a asie.shared_analyst5_03_a asie.shared_analyst5_04_a
	asie.shared_analyst5_05_a asie.shared_analyst5_06_a asie.shared_analyst5_07_a asie.shared_analyst5_08_a asie.shared_analyst5_09_a
	asie.shared_analyst5_10_a asie.shared_analyst5_11_a asie.shared_analyst5_12_a asie.shared_analyst5_13_a asie.shared_analyst5_14_a
	asie.shared_analyst5_15_a asie.shared_analyst5_16_a asie.shared_analyst5_17_a;
run;

/** merge the files with the mention count file **/

data asie.qf6_full_d_1;
set asie.qf6_full_d; /** this file is from tne code of TABLE 3 **/
run;

proc sort data=asie.qf6_full_d_1;
by mentionedticker year month;
run;

proc sql;
  create table asie.qf6_full_shared_analyst as
  select a.*,b.*
    from asie.qf6_full_d_1 as a left join asie.shared_analyst6 as b
    on a.mentionedticker=b.ticker1
	and a.year=b.year_p
    and a.month=b.month_p;
 run; 

data asie.qf6_full_shared_analyst2;
set asie.qf6_full_shared_analyst;
if sa_lret ne .;
run;


***************************************************************
* Fama-MacBeth regressions
***************************************************************;

data asie.simple_count7i;
set asie.qf6_full_shared_analyst2; 
if abs(prc) ge 5;
if r_var ge 2;
if l3simple_count ne .;
keep year month mentionedticker ex_ret l_size l_size_x l3simple_count sa_lret l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;
run;

*----------------------winsorizing outliers-----------------------------------------------------;
%macro outliers();

%let vars=  l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;

%let cvar=%scan(&vars,1);
%let pos=1;

%do %while (&cvar ne );

%put &cvar;

proc sort data=asie.simple_count7i; by year month; run;

proc means data=asie.simple_count7i min p1 p99 max median mean n noprint;
by year month;
var &cvar;
output out = asie.outliers 
p1 (&cvar) = p1_&cvar
p99 (&cvar) = p99_&cvar;
run;

proc sql;
create table asie.temp as select a.*, b.p1_&cvar, b.p99_&cvar
from asie.simple_count7i as a left join asie.outliers as b
on a.year=b.year and a.month=b.month;
quit;

data asie.simple_count7i; set asie.temp;
if &cvar<p1_&cvar and &cvar ne . and p1_&cvar ne . then &cvar=p1_&cvar;
if &cvar>p99_&cvar and &cvar ne . and p99_&cvar ne . then &cvar=p99_&cvar;
drop p1_&cvar p99_&cvar;
run;

%let pos=%eval(&pos+1);
%let cvar=%scan(&vars,&pos);

%end;

%mend outliers;


%outliers;

proc sort data=asie.simple_count7i; by mentionedticker year month; run;

data asie.simple_count7j; set asie.simple_count7i; run;


data asie.simple_count7ja;
set asie.simple_count7j;
/**if r_var le 4;
if r_var ge 5;**/ 
run;

data asie.simple_count7jb;
set asie.simple_count7ja; 
/**if l3simple_count > 0;**/ /** this to exclude firms that are not mentioned at all **/
ex_ret100=ex_ret*100;
log_size=log(l_Size);
if Mtbq le 0 and Mtbq ne . then Mtbq=0.01;
if Mtbq ge 30 and Mtbq ne . then Mtbq=30;
log_Mtbq=log(Mtbq);
if l3simple_count ne .;
if l3simple_count = 0 then do;
no_mention=1;
end;
else do;
no_mention=0;
end;
run;

data asie.simple_count7jc;
set asie.simple_count7jb;
l_sc=log(1+l3simple_count); 
sc_no_mention=l_sc*no_mention;
size_no_mention=log_size*no_mention;
run;

proc sort data=asie.simple_count7jc;
by year month;
run;

ods listing close;
ods output parameterestimates=pe fitstatistics=adjrsq;
proc reg data=asie.simple_count7jc;
 by year month;
 model ex_ret100=l_sc sa_lret no_mention size_no_mention/**target sc_target**/ log_size log_Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;
run;
quit;
ods listing;

proc means data=pe mean std t probt;
 var estimate; class variable;
run;

/** Newey-West correction **/

proc sort data=pe; by variable; run;

%let lags=12;
ods output parameterestimates=nw;
ods listing close;
proc model data=pe;
 by variable;
 instruments / intonly;
 estimate=a;
 fit estimate / gmm kernel=(bart,%eval(&lags+1),0); run;
quit;
ods listing;

proc print data=nw; id variable;
 var estimate--df; format estimate stderr 7.4;
run;

/** number of observations **/

data asie.obs1;
set asie.simple_count7jc;
if ex_ret100 ne .;
if l_sc ne .;
if sa_lret ne .;
if no_mention ne .;
if size_no_mention ne .; 
if log_size ne .;
if log_Mtbq ne .;
if l_Past6_ret ne .;
if Profitability3 ne .;
if Investment3 ne .;
if l_Beta ne .;
if l_Idiovol;
run;

/** mean r-sqaure **/

data asie.rsq;
set adjrsq;
if label2='R-Square';
proc print;
run;

proc means data=asie.rsq noprint;
var nValue2;
output out=asie.rsq_mean mean=rsq_mean;
proc print;
run;


/****************************************************/
/** CODE 9: Overlap_Scherbina_Schluschets **/
/****************************************************/

/** work on the base mention file **/

data asie.simple_count1;
set asie.sec_competition_new;
byear=year(fiscalmonth);
bmonth=month(fiscalmonth);
if filingticker='GOOG' then filingticker='GOOGL';
if mentionedticker='GOOG' then mentionedticker='GOOGL';
if filingticker='UA' then filingticker='UAA';
if mentionedticker='UA' then mentionedticker='UAA';
if filingticker='CENT' then filingticker='CENTA';
if mentionedticker='CENT' then mentionedticker='CENTA';
keep filingticker mentionedticker byear bmonth;
if mentionedticker ne '';
keep filingticker mentionedticker byear bmonth;
proc print;
run;

proc sort data=asie.simple_count1 nodupkey;
by filingticker mentionedticker byear bmonth;
run;

data asie.simple_count2;
set asie.simple_count1;
if byear ge 1995 and byear le 2017;
run;

/** leave only S&P500 firms **/

data asie.SP500_list_filing;
set asie.SP500_list;
sp_firm_filing=sp_firm;
keep sp_firm_filing;
run;

proc sql;
  create table asie.simple_count2_filing as
  select a.*,b.*
    from asie.simple_count2 as a left join asie.SP500_list_filing as b
    on a.filingticker=b.sp_firm_filing;
 run;

data asie.SP500_list_mentioned;
set asie.SP500_list;
sp_firm_mentioned=sp_firm;
keep sp_firm_mentioned;
run;

proc sql;
  create table asie.simple_count2_a as
  select a.*,b.*
    from asie.simple_count2_filing as a left join asie.SP500_list_mentioned as b
    on a.mentionedticker=b.sp_firm_mentioned;
 run;

data asie.simple_count2_b;
set asie.simple_count2_a;
if sp_firm_filing ne '' and sp_firm_mentioned ne '';
proc print;
run;

/** merge with the Scherbina and Schlusche news linls monthly file **/

proc import datafile='c:\sas files\news_links.tsv' out=asie.news_links dbms=tab replace; guessingrows=32767;
   getnames=yes; 
run;

data asie.news_links1;
set asie.news_links;
year=year(date);
month=month(date);
day=day(date);
run;

data asie.months;
set asie.calendar_months;
yy=year;
mm=month;
dd=day(date);
keep yy mm dd;
if year ge 2011 and year le 2017;
run;

proc sort data=asie.months nodupkey;
by yy mm dd;
run;

proc sql;
  create table asie.news_links2 as
  select a.*,b.*
    from asie.news_links1 as a left join asie.months as b
    on a.year=b.yy
    and a.month=b.mm
    and a.day=b.dd;
 run;

data asie.news_links3;
set asie.news_links2;
if yy ne .;
run;

proc sql;
  create table asie.simple_count2_c as
  select a.*,b.*
    from asie.simple_count2_b as a left join asie.news_links3 as b
    on a.filingticker=b.Centroid_Firm_Ticker
    and a.mentionedticker=b.Connected_Firm_Ticker
    and a.byear=b.year
    and a.bmonth=b.month;
 run;

proc sort data=asie.simple_count2_c nodupkey;
by filingticker mentionedticker byear bmonth;
run;

proc sort data=asie.simple_count2_c;
by filingticker byear bmonth mentionedticker;
run;

data asie.simple_count2_inter;
set asie.simple_count2_c;
if weight = '';
proc print;
run;

data asie.simple_count2_in;
set asie.simple_count2_c;
if weight > 0;
proc print;
run;


/****************************************************/
/** CODE 10: SimpleCountTests_Changes **/
/****************************************************/

data asie.simple_count7a;
set asie.simple_count7_all; 
keep mentionedticker year month l3simple_count l3simple_mean_size l3simple_sum_size l3simple_size_count;
run;

/** merge with the firm characteristics **/

proc sql;
  create table asie.simple_count7b as
  select a.*,b.*
    from asie.firm_characteristics as a left join asie.simple_count7a as b
    on a.tic=b.mentionedticker
    and a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7c;
set asie.simple_count7b;
if mentionedticker = '' then l3simple_count=0;
if mentionedticker = '' then l3simple_mean_size=.;
if mentionedticker = '' then l3simple_sum_size=.;
if mentionedticker = '' then l3simple_size_count=.;
run;

data asie.simple_count7ca;
set asie.simple_count7c;
if l3simple_count ne .;
run;

proc sort data=asie.simple_count7ca;
by mentionedticker year month;
run;

data asie.simple_count7cb;
set asie.simple_count7ca;
lt=lag12(mentionedticker);
lag_l3simple_count=lag12(l3simple_count);
if lt=mentionedticker then do;
l_l3simple_count=lag_l3simple_count;
end;
run;

data asie.simple_count7cc;
set asie.simple_count7cb;
c_simplecount=l3simple_count-l_l3simple_count;
run;

/** remove microcap cap stocsk **/

proc univariate data=asie.simple_count7cc;
var l3simple_count l3simple_mean_size l3simple_sum_size;
run;

proc sql;
  create table asie.simple_count7f as
  select a.*,b.*
    from asie.simple_count7cc as a left join asie.size_breakpoints_new as b 
    on a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7g;
set asie.simple_count7f;
if l_size ne .;
l_size_x=l_size;
drop gr_decile;
run;

proc sort data=asie.simple_count7g;
by year month l_size;
run;

data asie.simple_count7h;
set asie.simple_count7g;
if l_size/1000 le p10 then do;
r_var=0;
end;
else do;
if l_size/1000 le p20 then do;
r_var=1;
end;
else do;
if l_size/1000 le p30 then do;
r_var=2;
end;
else do;
if l_size/1000 le p40 then do;
r_var=3;
end;
else do;
if l_size/1000 le p50 then do;
r_var=4;
end;
else do;
if l_size/1000 le p60 then do;
r_var=5;
end;
else do;
if l_size/1000 le p70 then do;
r_var=6;
end;
else do;
if l_size/1000 le p80 then do;
r_var=7;
end;
else do;
if l_size/1000 le p90 then do;
r_var=8;
end;
else do;
r_var=9;
end;
end;
end;
end;
end;
end;
end;
end;
end;
run;

proc sort data=asie.simple_count7h;
by r_var year month;
run;

proc means data=asie.simple_count7h noprint;
by r_var year month;
var ex_ret/weight=l_size_x; 
output out=asie.simple_count7h_mean mean=size_exret;
proc print;
run;

proc sql;
  create table asie.simple_count7h_a as
  select a.*,b.*
    from asie.simple_count7h as a left join asie.simple_count7h_mean as b 
    on a.r_var=b.r_var
    and a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7h_b;
set asie.simple_count7h_a;
ex_ret_size=ex_ret-size_exret;
run;

proc freq data=asie.simple_count7h_b;
tables r_var;
run;

data asie.simple_count7i;
set asie.simple_count7h_b;
if abs(prc) ge 5;
if r_var ge 2;
run;


/***************************/
/** Single sort **/
/***************************/

proc univariate data=asie.simple_count7i;
var l3simple_count l3simple_mean_size l3simple_sum_size;
run;

proc freq data=asie.simple_count7i;
tables c_simplecount;
run;

data asie.simple_count7j;
set asie.simple_count7i;
alt_var=c_simplecount;
if alt_var ne .;
run;

/** sorting mentions into 0,1,2,and 3+ groups **/

data asie.qf6_full_d;
set asie.simple_count7j;
if c_simplecount<0 then do;
gr_decile=0;
end;
else do;
if c_simplecount=0 then do;
gr_decile=1;
end;
else do;
if c_simplecount>0 then do;
gr_decile=2;
end;
end;
end;
run;

/** Calculating average excess returns **/

data asie.qf7a;
set asie.qf6_full_d;
/**if l3simple_count > 0;**/ /** use this condition for only mentioned firms **/
run;

proc sort data=asie.qf7a;
by year month gr_decile;
run;

proc means data=asie.qf7a noprint;
by year month gr_decile;
var ex_ret/weight=l_size_x; /** change ex_ret to ex_ret_size for size-decile adjusted return **/
output out=asie.qf8 mean=mean_exret;
run;

/** Adding the factors **/

data asie.qf8a;
set asie.qf8;
if _freq_ ge 5;
run;

proc sql;
  create table asie.qf9 as
  select a.*,b.*
    from asie.qf8a as a, asie.factorsmonthly2017 as b
    where a.year=b.year
    and a.month=b.month;
 run;

data asie.qf9a;
set asie.qf9;
if year ge 1994;
run;

proc sort data=asie.qf9a;
by year month gr_decile;
run;

data asie.qf9b;
set asie.qf9a;
by year month gr_decile;
if first.month then do x=0;
end;
x+1;
run;

proc sort data=asie.qf9b;
by descending year descending month descending x;
run;

data asie.qf9c;
set asie.qf9b;
by descending year descending month descending x;
if first.month then do y=x;
end;
y+0;
run;

proc sort data=asie.qf9c;
by year month gr_decile;
run;

data asie.qf10;
set asie.qf9c;
if y=3;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc means data=asie.qf10 noprint; /** this is the average portfolio size **/
by gr_decile;
var _freq_; 
output out=asie.qf10_port mean=firm_port;
proc print;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=;
run;

/**proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml mom;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma mom;
run;


/** Mean return for the difference **/

proc sort data=asie.qf10;
by year month gr_decile;
run;

data asie.qf11; 
set asie.qf10;
lag2_grd=lag2(gr_decile);
lag2_meanf=lag2(mean_exret);
lag2year=lag2(year);
if gr_decile=2 and lag2_grd=0 and lag2year=year then
p_ret=mean_exret-lag2_meanf;
if gr_decile=2; 
run;

proc sort data=asie.qf11;
by year month;
run;

proc reg data=asie.qf11;
model p_ret=;
run;

/**proc reg data=asie.qf11;
model p_ret=emkt;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma mom;
run;


